From ebcebf1dee7f8314976b1e0cabd62b4cf893d765 Mon Sep 17 00:00:00 2001
From: Michalis Spyrou <michalis.spyrou@arm.com>
Date: Wed, 21 Oct 2020 00:04:14 +0100
Subject: COMPMID-3638: Move NEON kernels

Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Change-Id: Ieed3e4bc8be7fef80c90c5094599b477a56fc473
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4285
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 .../CPPBoxWithNonMaximaSuppressionLimitKernel.h    |   2 +-
 .../core/CPP/kernels/CPPCornerCandidatesKernel.h   |   4 +-
 .../CPPDetectionWindowNonMaximaSuppressionKernel.h |   6 +-
 arm_compute/core/NEON/INEKernel.h                  |  34 ---
 arm_compute/core/NEON/INESimpleKernel.h            |  34 ---
 arm_compute/core/NEON/NEKernels.h                  | 151 ------------
 .../core/NEON/kernels/NEAbsoluteDifferenceKernel.h |  86 -------
 arm_compute/core/NEON/kernels/NEAccumulateKernel.h | 139 -----------
 .../core/NEON/kernels/NEActivationLayerKernel.h    |  85 -------
 .../core/NEON/kernels/NEArithmeticAdditionKernel.h | 106 --------
 .../NEON/kernels/NEArithmeticSubtractionKernel.h   | 118 ---------
 .../NEON/kernels/NEBatchConcatenateLayerKernel.h   |  89 -------
 .../NEON/kernels/NEBatchNormalizationLayerKernel.h | 139 -----------
 .../core/NEON/kernels/NEBatchToSpaceLayerKernel.h  | 101 --------
 arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h |  72 ------
 arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h |  70 ------
 arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h  |  72 ------
 arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h |  72 ------
 .../NEON/kernels/NEBoundingBoxTransformKernel.h    |  95 --------
 arm_compute/core/NEON/kernels/NEBox3x3Kernel.h     |  71 ------
 arm_compute/core/NEON/kernels/NECannyEdgeKernel.h  | 189 ---------------
 .../core/NEON/kernels/NEChannelCombineKernel.h     | 129 ----------
 .../core/NEON/kernels/NEChannelExtractKernel.h     | 111 ---------
 .../NEON/kernels/NEChannelShuffleLayerKernel.h     |  80 ------
 arm_compute/core/NEON/kernels/NECol2ImKernel.h     | 115 ---------
 .../core/NEON/kernels/NEColorConvertKernel.h       |  93 -------
 .../kernels/NEConvertFullyConnectedWeightsKernel.h |  95 --------
 .../kernels/NEConvertQuantizedSignednessKernel.h   |  76 ------
 .../core/NEON/kernels/NEConvolutionKernel.h        | 267 ---------------------
 arm_compute/core/NEON/kernels/NECopyKernel.h       |  78 ------
 arm_compute/core/NEON/kernels/NECropKernel.h       | 114 ---------
 .../NEON/kernels/NECumulativeDistributionKernel.h  |  83 -------
 .../NEON/kernels/NEDepthConcatenateLayerKernel.h   |  89 -------
 .../core/NEON/kernels/NEDepthConvertLayerKernel.h  |  94 --------
 .../core/NEON/kernels/NEDepthToSpaceLayerKernel.h  |  81 -------
 .../NEDepthwiseConvolutionLayerNativeKernel.h      | 129 ----------
 .../NEON/kernels/NEDequantizationLayerKernel.h     |  76 ------
 arm_compute/core/NEON/kernels/NEDerivativeKernel.h |  98 --------
 arm_compute/core/NEON/kernels/NEDilateKernel.h     |  53 ----
 .../NEON/kernels/NEDirectConvolutionLayerKernel.h  | 108 ---------
 .../NEDirectConvolutionLayerOutputStageKernel.h    | 102 --------
 .../NEON/kernels/NEElementwiseOperationKernel.h    | 214 -----------------
 .../core/NEON/kernels/NEElementwiseUnaryKernel.h   | 103 --------
 arm_compute/core/NEON/kernels/NEErodeKernel.h      |  53 ----
 .../core/NEON/kernels/NEFFTDigitReverseKernel.h    |  93 -------
 .../core/NEON/kernels/NEFFTRadixStageKernel.h      | 103 --------
 arm_compute/core/NEON/kernels/NEFFTScaleKernel.h   |  84 -------
 .../core/NEON/kernels/NEFastCornersKernel.h        |  76 ------
 arm_compute/core/NEON/kernels/NEFillArrayKernel.h  |  77 ------
 arm_compute/core/NEON/kernels/NEFillBorderKernel.h |  82 -------
 .../core/NEON/kernels/NEFlattenLayerKernel.h       |  81 -------
 arm_compute/core/NEON/kernels/NEFloorKernel.h      |  60 -----
 .../NEON/kernels/NEFuseBatchNormalizationKernel.h  | 116 ---------
 .../core/NEON/kernels/NEGEMMAssemblyBaseKernel.h   |  89 -------
 .../core/NEON/kernels/NEGEMMInterleave4x4Kernel.h  | 102 --------
 .../NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h  |  90 -------
 .../kernels/NEGEMMLowpOffsetContributionKernel.h   | 103 --------
 ...NEGEMMLowpOffsetContributionOutputStageKernel.h | 133 ----------
 .../NEGEMMLowpQuantizeDownInt32ScaleKernel.h       | 112 ---------
 ...antizeDownInt32ToInt16ScaleByFixedPointKernel.h | 116 ---------
 ...uantizeDownInt32ToInt8ScaleByFixedPointKernel.h | 119 ---------
 ...antizeDownInt32ToUint8ScaleByFixedPointKernel.h | 119 ---------
 .../core/NEON/kernels/NEGEMMLowpReductionKernel.h  | 170 -------------
 .../core/NEON/kernels/NEGEMMMatrixAdditionKernel.h |  96 --------
 .../core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h |  94 --------
 .../core/NEON/kernels/NEGEMMTranspose1xWKernel.h   |  95 --------
 arm_compute/core/NEON/kernels/NEGatherKernel.h     | 113 ---------
 .../core/NEON/kernels/NEGaussian3x3Kernel.h        |  54 -----
 .../core/NEON/kernels/NEGaussian5x5Kernel.h        |  81 -------
 .../core/NEON/kernels/NEGaussianPyramidKernel.h    | 105 --------
 .../NEON/kernels/NEGenerateProposalsLayerKernel.h  |  85 -------
 .../core/NEON/kernels/NEHOGDescriptorKernel.h      | 149 ------------
 .../core/NEON/kernels/NEHOGDetectorKernel.h        |  89 -------
 .../core/NEON/kernels/NEHarrisCornersKernel.h      | 105 --------
 .../NEON/kernels/NEHeightConcatenateLayerKernel.h  |  83 -------
 arm_compute/core/NEON/kernels/NEHistogramKernel.h  | 135 -----------
 arm_compute/core/NEON/kernels/NEIm2ColKernel.h     | 139 -----------
 .../kernels/NEInstanceNormalizationLayerKernel.h   |  96 --------
 .../core/NEON/kernels/NEIntegralImageKernel.h      |  54 -----
 .../core/NEON/kernels/NEL2NormalizeLayerKernel.h   |  90 -------
 arm_compute/core/NEON/kernels/NELKTrackerKernel.h  | 149 ------------
 .../NELocallyConnectedMatrixMultiplyKernel.h       |  77 ------
 .../core/NEON/kernels/NEMagnitudePhaseKernel.h     | 101 --------
 .../core/NEON/kernels/NEMaxUnpoolingLayerKernel.h  |  97 --------
 arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h |  83 -------
 .../NEON/kernels/NEMeanStdDevNormalizationKernel.h |  98 --------
 arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h  |  54 -----
 arm_compute/core/NEON/kernels/NEMemsetKernel.h     |  71 ------
 .../core/NEON/kernels/NEMinMaxLayerKernel.h        |  90 -------
 .../core/NEON/kernels/NEMinMaxLocationKernel.h     | 171 -------------
 .../core/NEON/kernels/NENonLinearFilterKernel.h    | 151 ------------
 .../NEON/kernels/NENonMaximaSuppression3x3Kernel.h | 108 ---------
 .../core/NEON/kernels/NENormalizationLayerKernel.h | 108 ---------
 arm_compute/core/NEON/kernels/NEPadLayerKernel.h   | 113 ---------
 arm_compute/core/NEON/kernels/NEPermuteKernel.h    | 102 --------
 .../NEON/kernels/NEPixelWiseMultiplicationKernel.h | 186 --------------
 .../core/NEON/kernels/NEPoolingLayerKernel.h       | 229 ------------------
 .../core/NEON/kernels/NEPriorBoxLayerKernel.h      |  98 --------
 .../NEON/kernels/NEQLSTMLayerNormalizationKernel.h | 137 -----------
 .../core/NEON/kernels/NEQuantizationLayerKernel.h  | 102 --------
 .../core/NEON/kernels/NEROIAlignLayerKernel.h      | 101 --------
 .../core/NEON/kernels/NEROIPoolingLayerKernel.h    |  81 -------
 arm_compute/core/NEON/kernels/NERangeKernel.h      |  90 -------
 .../core/NEON/kernels/NEReductionOperationKernel.h |  92 -------
 arm_compute/core/NEON/kernels/NERemapKernel.h      |  83 -------
 arm_compute/core/NEON/kernels/NEReorgLayerKernel.h |  83 -------
 .../core/NEON/kernels/NEReshapeLayerKernel.h       |  63 -----
 arm_compute/core/NEON/kernels/NEReverseKernel.h    |  80 ------
 arm_compute/core/NEON/kernels/NEScaleKernel.h      | 127 ----------
 arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h  |  86 -------
 arm_compute/core/NEON/kernels/NESelectKernel.h     | 103 --------
 arm_compute/core/NEON/kernels/NESobel3x3Kernel.h   |  86 -------
 arm_compute/core/NEON/kernels/NESobel5x5Kernel.h   | 126 ----------
 arm_compute/core/NEON/kernels/NESobel7x7Kernel.h   | 130 ----------
 .../core/NEON/kernels/NESoftmaxLayerKernel.h       | 139 -----------
 .../core/NEON/kernels/NESpaceToBatchLayerKernel.h  | 111 ---------
 .../core/NEON/kernels/NESpaceToDepthLayerKernel.h  |  81 -------
 arm_compute/core/NEON/kernels/NEStackLayerKernel.h |  93 -------
 .../core/NEON/kernels/NEStridedSliceKernel.h       | 102 --------
 .../core/NEON/kernels/NETableLookupKernel.h        |  80 ------
 arm_compute/core/NEON/kernels/NEThresholdKernel.h  |  84 -------
 arm_compute/core/NEON/kernels/NETileKernel.h       |  76 ------
 arm_compute/core/NEON/kernels/NETransposeKernel.h  |  90 -------
 .../core/NEON/kernels/NEUpsampleLayerKernel.h      |  99 --------
 arm_compute/core/NEON/kernels/NEWarpKernel.h       | 129 ----------
 .../core/NEON/kernels/NEWeightsReshapeKernel.h     | 109 ---------
 .../NEON/kernels/NEWidthConcatenateLayerKernel.h   |  82 -------
 arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h  | 106 --------
 .../core/NEON/kernels/assembly/arm_gemm_local.hpp  |  37 ---
 arm_compute/core/Types.h                           |   9 +
 arm_compute/core/utils/misc/Traits.h               |   3 +-
 arm_compute/runtime/CL/functions/CLHarrisCorners.h |   4 +-
 arm_compute/runtime/IOperator.h                    |   2 +
 arm_compute/runtime/ITransformWeights.h            |   5 +-
 arm_compute/runtime/NEON/INEOperator.h             |   5 +-
 arm_compute/runtime/NEON/INESimpleFunction.h       |  21 +-
 .../runtime/NEON/INESimpleFunctionNoBorder.h       |   7 +-
 .../runtime/NEON/functions/NEAbsoluteDifference.h  |  18 +-
 arm_compute/runtime/NEON/functions/NEAccumulate.h  |  38 ++-
 .../runtime/NEON/functions/NEActivationLayer.h     |  17 +-
 .../runtime/NEON/functions/NEArgMinMaxLayer.h      |  10 +
 .../runtime/NEON/functions/NEArithmeticAddition.h  |  12 +
 .../NEON/functions/NEBatchNormalizationLayer.h     |  20 +-
 .../runtime/NEON/functions/NEBatchToSpaceLayer.h   |  16 +-
 arm_compute/runtime/NEON/functions/NEBitwiseAnd.h  |  14 +-
 arm_compute/runtime/NEON/functions/NEBitwiseNot.h  |   2 +-
 arm_compute/runtime/NEON/functions/NEBitwiseOr.h   |   2 +-
 arm_compute/runtime/NEON/functions/NEBitwiseXor.h  |   2 +-
 .../NEON/functions/NEBoundingBoxTransform.h        |   7 +-
 arm_compute/runtime/NEON/functions/NEBox3x3.h      |   2 +-
 arm_compute/runtime/NEON/functions/NECannyEdge.h   |  36 +--
 arm_compute/runtime/NEON/functions/NECast.h        |   5 +-
 .../runtime/NEON/functions/NEChannelCombine.h      |   2 +-
 .../runtime/NEON/functions/NEChannelExtract.h      |   2 +-
 .../runtime/NEON/functions/NEChannelShuffleLayer.h |   4 +-
 arm_compute/runtime/NEON/functions/NECol2Im.h      |   5 +-
 .../runtime/NEON/functions/NEColorConvert.h        |   2 +-
 .../runtime/NEON/functions/NEComputeAllAnchors.h   |   9 +-
 .../runtime/NEON/functions/NEConcatenateLayer.h    |  19 +-
 .../functions/NEConvertFullyConnectedWeights.h     |  17 +-
 arm_compute/runtime/NEON/functions/NEConvolution.h |  59 ++++-
 .../runtime/NEON/functions/NEConvolutionLayer.h    |  13 +-
 arm_compute/runtime/NEON/functions/NECopy.h        |  15 +-
 arm_compute/runtime/NEON/functions/NECropResize.h  |   4 +-
 .../runtime/NEON/functions/NEDepthConvertLayer.h   |   3 +
 .../runtime/NEON/functions/NEDepthToSpaceLayer.h   |  16 +-
 .../NEON/functions/NEDepthwiseConvolutionLayer.h   |  71 +++---
 .../runtime/NEON/functions/NEDequantizationLayer.h |   1 +
 arm_compute/runtime/NEON/functions/NEDerivative.h  |  22 +-
 .../NEON/functions/NEDetectionPostProcessLayer.h   |   2 +
 arm_compute/runtime/NEON/functions/NEDilate.h      |   2 +-
 .../NEON/functions/NEDirectConvolutionLayer.h      |  37 ++-
 .../NEON/functions/NEElementwiseUnaryLayer.h       |   2 +
 .../runtime/NEON/functions/NEEqualizeHistogram.h   |  34 ++-
 arm_compute/runtime/NEON/functions/NEErode.h       |   2 +-
 arm_compute/runtime/NEON/functions/NEFFT1D.h       |  38 ++-
 arm_compute/runtime/NEON/functions/NEFFT2D.h       |  12 +-
 .../runtime/NEON/functions/NEFFTConvolutionLayer.h |   4 +-
 arm_compute/runtime/NEON/functions/NEFastCorners.h |  36 ++-
 arm_compute/runtime/NEON/functions/NEFill.h        |   3 +-
 arm_compute/runtime/NEON/functions/NEFillBorder.h  |   7 +-
 .../runtime/NEON/functions/NEFlattenLayer.h        |   3 +-
 arm_compute/runtime/NEON/functions/NEFloor.h       |   3 +-
 .../runtime/NEON/functions/NEFullyConnectedLayer.h |  23 +-
 .../NEON/functions/NEFuseBatchNormalization.h      |   8 +-
 arm_compute/runtime/NEON/functions/NEGEMM.h        |  33 +--
 .../NEON/functions/NEGEMMConvolutionLayer.h        |  29 ++-
 .../runtime/NEON/functions/NEGEMMInterleave4x4.h   |   2 +-
 .../NEGEMMLowpAssemblyMatrixMultiplyCore.h         |  23 +-
 .../NEON/functions/NEGEMMLowpMatrixMultiplyCore.h  |  46 ++--
 .../runtime/NEON/functions/NEGEMMLowpOutputStage.h |  50 ++++
 .../runtime/NEON/functions/NEGEMMTranspose1xW.h    |  16 +-
 arm_compute/runtime/NEON/functions/NEGather.h      |   3 +-
 arm_compute/runtime/NEON/functions/NEGaussian3x3.h |   2 +-
 arm_compute/runtime/NEON/functions/NEGaussian5x5.h |  27 ++-
 .../runtime/NEON/functions/NEGaussianPyramid.h     |  34 ++-
 .../NEON/functions/NEGenerateProposalsLayer.h      |  34 +--
 .../runtime/NEON/functions/NEHOGDescriptor.h       |  30 ++-
 arm_compute/runtime/NEON/functions/NEHOGDetector.h |  18 +-
 arm_compute/runtime/NEON/functions/NEHOGGradient.h |  25 +-
 .../runtime/NEON/functions/NEHOGMultiDetection.h   |  12 +-
 .../runtime/NEON/functions/NEHarrisCorners.h       |  20 +-
 arm_compute/runtime/NEON/functions/NEHistogram.h   |  25 +-
 arm_compute/runtime/NEON/functions/NEIm2Col.h      |  18 +-
 .../NEON/functions/NEInstanceNormalizationLayer.h  |  28 ++-
 .../runtime/NEON/functions/NEIntegralImage.h       |  14 +-
 .../runtime/NEON/functions/NEL2NormalizeLayer.h    |  20 +-
 arm_compute/runtime/NEON/functions/NELSTMLayer.h   |  21 +-
 .../runtime/NEON/functions/NELSTMLayerQuantized.h  |   4 +-
 .../runtime/NEON/functions/NELaplacianPyramid.h    |  12 +-
 .../NEON/functions/NELaplacianReconstruct.h        |  12 +-
 .../NEON/functions/NELocallyConnectedLayer.h       |  30 +--
 arm_compute/runtime/NEON/functions/NEMagnitude.h   |  15 +-
 .../runtime/NEON/functions/NEMaxUnpoolingLayer.h   |  22 +-
 arm_compute/runtime/NEON/functions/NEMeanStdDev.h  |  27 ++-
 .../functions/NEMeanStdDevNormalizationLayer.h     |  15 +-
 arm_compute/runtime/NEON/functions/NEMedian3x3.h   |   2 +-
 .../runtime/NEON/functions/NEMinMaxLocation.h      |  20 +-
 .../runtime/NEON/functions/NENonLinearFilter.h     |   2 +-
 .../NEON/functions/NENonMaximaSuppression3x3.h     |   2 +-
 .../runtime/NEON/functions/NENormalizationLayer.h  |  21 +-
 arm_compute/runtime/NEON/functions/NEOpticalFlow.h |  29 ++-
 arm_compute/runtime/NEON/functions/NEPReluLayer.h  |   1 +
 arm_compute/runtime/NEON/functions/NEPadLayer.h    |  38 ++-
 arm_compute/runtime/NEON/functions/NEPermute.h     |   3 +-
 arm_compute/runtime/NEON/functions/NEPhase.h       |   4 +-
 .../NEON/functions/NEPixelWiseMultiplication.h     |   1 +
 .../runtime/NEON/functions/NEPoolingLayer.h        |  24 +-
 .../runtime/NEON/functions/NEPriorBoxLayer.h       |   4 +-
 arm_compute/runtime/NEON/functions/NEQLSTMLayer.h  | 186 +++++++-------
 .../runtime/NEON/functions/NEQuantizationLayer.h   |   4 +-
 arm_compute/runtime/NEON/functions/NERNNLayer.h    |  25 +-
 .../runtime/NEON/functions/NEROIAlignLayer.h       |   6 +-
 .../runtime/NEON/functions/NEROIPoolingLayer.h     |  18 +-
 arm_compute/runtime/NEON/functions/NERange.h       |  18 +-
 arm_compute/runtime/NEON/functions/NEReduceMean.h  |  11 +-
 .../runtime/NEON/functions/NEReductionOperation.h  |  27 ++-
 arm_compute/runtime/NEON/functions/NERemap.h       |   2 +-
 arm_compute/runtime/NEON/functions/NEReorgLayer.h  |   3 +-
 .../runtime/NEON/functions/NEReshapeLayer.h        |  13 +-
 arm_compute/runtime/NEON/functions/NEReverse.h     |   3 +-
 arm_compute/runtime/NEON/functions/NEScale.h       |   2 +-
 arm_compute/runtime/NEON/functions/NESelect.h      |   7 +-
 arm_compute/runtime/NEON/functions/NESobel3x3.h    |   2 +-
 arm_compute/runtime/NEON/functions/NESobel5x5.h    |  29 ++-
 arm_compute/runtime/NEON/functions/NESobel7x7.h    |  29 ++-
 .../runtime/NEON/functions/NESoftmaxLayer.h        |  31 ++-
 .../runtime/NEON/functions/NESpaceToBatchLayer.h   |  16 +-
 .../runtime/NEON/functions/NESpaceToDepthLayer.h   |  13 +-
 arm_compute/runtime/NEON/functions/NEStackLayer.h  |  22 +-
 arm_compute/runtime/NEON/functions/NETableLookup.h |   2 +-
 arm_compute/runtime/NEON/functions/NEThreshold.h   |   1 +
 arm_compute/runtime/NEON/functions/NETile.h        |   3 +-
 arm_compute/runtime/NEON/functions/NETranspose.h   |   3 +-
 arm_compute/runtime/NEON/functions/NEUnstack.h     |  10 +
 .../runtime/NEON/functions/NEUpsampleLayer.h       |  18 +-
 arm_compute/runtime/NEON/functions/NEWarpAffine.h  |   2 +-
 .../runtime/NEON/functions/NEWarpPerspective.h     |   2 +-
 .../NEON/functions/NEWinogradConvolutionLayer.h    |  20 +-
 arm_compute/runtime/NEON/functions/NEYOLOLayer.h   |   4 +-
 260 files changed, 1468 insertions(+), 13457 deletions(-)
 delete mode 100644 arm_compute/core/NEON/INEKernel.h
 delete mode 100644 arm_compute/core/NEON/INESimpleKernel.h
 delete mode 100644 arm_compute/core/NEON/NEKernels.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEAccumulateKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEBox3x3Kernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NECannyEdgeKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEChannelCombineKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEChannelExtractKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NECol2ImKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEColorConvertKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEConvolutionKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NECopyKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NECropKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEDerivativeKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEDilateKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEErodeKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEFFTScaleKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEFastCornersKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEFillArrayKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEFillBorderKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEFloorKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEGatherKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEHistogramKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEIm2ColKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEIntegralImageKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NELKTrackerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEMemsetKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEPadLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEPermuteKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NERangeKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEReductionOperationKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NERemapKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEReorgLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEReverseKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEScaleKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NESelectKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NESobel3x3Kernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NESobel5x5Kernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NESobel7x7Kernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEStackLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEStridedSliceKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NETableLookupKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEThresholdKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NETileKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NETransposeKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEWarpKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/assembly/arm_gemm_local.hpp

(limited to 'arm_compute')

diff --git a/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h b/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h
index 1a3f2ba679..068b37d80c 100644
--- a/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h
@@ -24,7 +24,7 @@
 #ifndef ARM_COMPUTE_CPPBOXWITHNONMAXIMASUPPRESSIONLIMITKERNEL_H
 #define ARM_COMPUTE_CPPBOXWITHNONMAXIMASUPPRESSIONLIMITKERNEL_H
 
-#include "arm_compute/core/NEON/INEKernel.h"
+#include "arm_compute/core/CPP/ICPPKernel.h"
 #include "arm_compute/core/Types.h"
 
 namespace arm_compute
diff --git a/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h b/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h
index ddb346dfc2..e4fd250a61 100644
--- a/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h
@@ -24,8 +24,8 @@
 #ifndef ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H
 #define ARM_COMPUTE_CPPCORNERCANDIDATESKERNEL_H
 
+#include "arm_compute/core/CPP/ICPPKernel.h"
 #include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/INEKernel.h"
 
 #include "support/Mutex.h"
 
@@ -39,7 +39,7 @@ using IImage = ITensor;
 
 /** CPP kernel to perform corner candidates
  */
-class CPPCornerCandidatesKernel : public INEKernel
+class CPPCornerCandidatesKernel : public ICPPKernel
 {
 public:
     const char *name() const override
diff --git a/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h b/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h
index dd6bbd56e0..5275a357b3 100644
--- a/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,9 +24,9 @@
 #ifndef ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H
 #define ARM_COMPUTE_CPPDETECTIONWINDOWNONMAXIMASUPPRESSIONKERNEL_H
 
+#include "arm_compute/core/CPP/ICPPKernel.h"
 #include "arm_compute/core/IArray.h"
 #include "arm_compute/core/IHOG.h"
-#include "arm_compute/core/NEON/INEKernel.h"
 #include "arm_compute/core/Types.h"
 
 namespace arm_compute
@@ -53,6 +53,8 @@ public:
     CPPDetectionWindowNonMaximaSuppressionKernel(CPPDetectionWindowNonMaximaSuppressionKernel &&) = default;
     /** Allow instances of this class to be moved */
     CPPDetectionWindowNonMaximaSuppressionKernel &operator=(CPPDetectionWindowNonMaximaSuppressionKernel &&) = default;
+    /** Default destructor */
+    ~CPPDetectionWindowNonMaximaSuppressionKernel() = default;
     /** Initialise the kernel's input, output and the euclidean minimum distance
      *
      * @attention: If @ref IDetectionWindowArray is passed to the kernel, the map() and unmap() methods @ref IDetectionWindowArray must be called respectively before and after
diff --git a/arm_compute/core/NEON/INEKernel.h b/arm_compute/core/NEON/INEKernel.h
deleted file mode 100644
index 87e17c80b4..0000000000
--- a/arm_compute/core/NEON/INEKernel.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_INEKERNEL_H
-#define ARM_COMPUTE_INEKERNEL_H
-
-#include "arm_compute/core/CPP/ICPPKernel.h"
-
-namespace arm_compute
-{
-/** Common interface for all kernels implemented in NEON. */
-using INEKernel = ICPPKernel;
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_INEKERNEL_H */
diff --git a/arm_compute/core/NEON/INESimpleKernel.h b/arm_compute/core/NEON/INESimpleKernel.h
deleted file mode 100644
index abe15c15c3..0000000000
--- a/arm_compute/core/NEON/INESimpleKernel.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_INESIMPLEKERNEL_H
-#define ARM_COMPUTE_INESIMPLEKERNEL_H
-
-#include "arm_compute/core/CPP/ICPPSimpleKernel.h"
-
-namespace arm_compute
-{
-/** Interface for simple NEON kernels having 1 tensor input and 1 tensor output */
-using INESimpleKernel = ICPPSimpleKernel;
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_INESIMPLEKERNEL_H */
diff --git a/arm_compute/core/NEON/NEKernels.h b/arm_compute/core/NEON/NEKernels.h
deleted file mode 100644
index 4d3e6633c9..0000000000
--- a/arm_compute/core/NEON/NEKernels.h
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEKERNELS_H
-#define ARM_COMPUTE_NEKERNELS_H
-
-/* Header regrouping all the NEON kernels */
-#include "arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h"
-#include "arm_compute/core/NEON/kernels/NEAccumulateKernel.h"
-#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h"
-#include "arm_compute/core/NEON/kernels/NEBox3x3Kernel.h"
-#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEChannelCombineKernel.h"
-#include "arm_compute/core/NEON/kernels/NEChannelExtractKernel.h"
-#include "arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h"
-#include "arm_compute/core/NEON/kernels/NEColorConvertKernel.h"
-#include "arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
-#include "arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
-#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h"
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-#include "arm_compute/core/NEON/kernels/NECropKernel.h"
-#include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDilateKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h"
-#include "arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h"
-#include "arm_compute/core/NEON/kernels/NEErodeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFFTScaleKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFloorKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGatherKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h"
-#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
-#include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEIntegralImageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h"
-#include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h"
-#include "arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h"
-#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h"
-#include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPermuteKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h"
-#include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NERangeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h"
-#include "arm_compute/core/NEON/kernels/NERemapKernel.h"
-#include "arm_compute/core/NEON/kernels/NEReorgLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEReverseKernel.h"
-#include "arm_compute/core/NEON/kernels/NEScaleKernel.h"
-#include "arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h"
-#include "arm_compute/core/NEON/kernels/NESelectKernel.h"
-#include "arm_compute/core/NEON/kernels/NESobel3x3Kernel.h"
-#include "arm_compute/core/NEON/kernels/NESobel5x5Kernel.h"
-#include "arm_compute/core/NEON/kernels/NESobel7x7Kernel.h"
-#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEStackLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEStridedSliceKernel.h"
-#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h"
-#include "arm_compute/core/NEON/kernels/NEThresholdKernel.h"
-#include "arm_compute/core/NEON/kernels/NETileKernel.h"
-#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEWarpKernel.h"
-#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h"
-
-#endif /* ARM_COMPUTE_NEKERNELS_H */
diff --git a/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h b/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h
deleted file mode 100644
index 894e9277c7..0000000000
--- a/arm_compute/core/NEON/kernels/NEAbsoluteDifferenceKernel.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H
-#define ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the absolute difference kernel
- *
- * Absolute difference is computed by:
- * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f]
- */
-class NEAbsoluteDifferenceKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEAbsoluteDifferenceKernel";
-    }
-    /** Default constructor */
-    NEAbsoluteDifferenceKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEAbsoluteDifferenceKernel(const NEAbsoluteDifferenceKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEAbsoluteDifferenceKernel &operator=(const NEAbsoluteDifferenceKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEAbsoluteDifferenceKernel(NEAbsoluteDifferenceKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEAbsoluteDifferenceKernel &operator=(NEAbsoluteDifferenceKernel &&) = default;
-    /** Default destructor */
-    ~NEAbsoluteDifferenceKernel() = default;
-
-    /** Set the inputs and output tensors
-     *
-     * @param[in]  input1 Source tensor. Data types supported: U8/S16
-     * @param[in]  input2 Source tensor. Data types supported: U8/S16
-     * @param[out] output Destination tensor, Data types supported: U8/S16
-     */
-    void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Common signature for all the specialised absolute difference functions
-     *
-     * @param[in]  input1 An input tensor. Data types supported: U8/S16.
-     * @param[in]  input2 An input tensor. Data types supported: U8/S16.
-     * @param[out] output The output tensor, Data types supported: U8 (Only if both inputs are U8), S16.
-     * @param[in]  window Region on which to execute the kernel.
-     */
-    using AbsDiffFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window);
-
-    /** Absolute difference function to use for the particular tensor formats passed to configure() */
-    AbsDiffFunction *_func;
-    const ITensor   *_input1;
-    const ITensor   *_input2;
-    ITensor         *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEABSOLUTEDIFFERENCEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h b/arm_compute/core/NEON/kernels/NEAccumulateKernel.h
deleted file mode 100644
index 2e9935cd79..0000000000
--- a/arm_compute/core/NEON/kernels/NEAccumulateKernel.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEACCUMULATEKERNEL_H
-#define ARM_COMPUTE_NEACCUMULATEKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the accumulate kernel
- *
- * Accumulation is computed by:
- * @f[ accum(x,y) = accum(x,y) + input(x,y) @f]
- */
-class NEAccumulateKernel : public INESimpleKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEAccumulateKernel";
-    }
-    /** Set the input and accumulation tensors
-     *
-     * @param[in]  input Source tensor. Data type supported: U8.
-     * @param[out] accum Destination tensor. Data type supported: S16.
-     */
-    void configure(const ITensor *input, ITensor *accum);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-};
-
-/** Interface for the accumulate weighted kernel
- *
- * Weighted accumulation is computed:
- * @f[ accum(x,y) = (1 - \alpha)*accum(x,y) + \alpha*input(x,y) @f]
- *
- * Where @f$ 0 \le \alpha \le 1 @f$
- * Conceptually, the rounding for this is defined as:
- * @f[ output(x,y)= uint8( (1 - \alpha) * float32( int32( output(x,y) ) ) + \alpha * float32( int32( input(x,y) ) ) ) @f]
-*/
-class NEAccumulateWeightedKernel : public INESimpleKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEAccumulateWeightedKernel";
-    }
-    /** Default constructor */
-    NEAccumulateWeightedKernel();
-    /** Set the input and accumulation tensors, and the scale value
-     *
-     * @param[in]     input Source tensor. Data type supported: U8.
-     * @param[in]     alpha Scalar value in the range [0.0f, 1.0f]
-     * @param[in,out] accum Accumulated tensor. Data type supported: U8.
-     */
-    void configure(const ITensor *input, float alpha, ITensor *accum);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-protected:
-    float _alpha;
-};
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-/** Interface for the accumulate weighted kernel using F16 */
-class NEAccumulateWeightedFP16Kernel : public NEAccumulateWeightedKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEAccumulateWeightedFP16Kernel";
-    }
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-};
-#else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-/** Interface for the accumulate weighted kernel using F16 */
-using NEAccumulateWeightedFP16Kernel = NEAccumulateWeightedKernel;
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-
-/** Interface for the accumulate squared kernel
- *
- * The accumulation of squares is computed:
- * @f[ accum(x,y) = saturate_{int16} ( (uint16) accum(x,y) + (((uint16)(input(x,y)^2)) >> (shift)) ) @f]
- *
- * Where @f$ 0 \le shift \le 15 @f$
-*/
-class NEAccumulateSquaredKernel : public INESimpleKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEAccumulateSquaredKernel";
-    }
-    /** Default constructor */
-    NEAccumulateSquaredKernel();
-    /** Set the input and accumulation tensors and the shift value.
-     *
-     * @param[in]     input Source tensor. Data type supported: U8.
-     * @param[in]     shift Shift value in the range of [0, 15]
-     * @param[in,out] accum Accumulated tensor. Data type supported: S16.
-     */
-    void configure(const ITensor *input, uint32_t shift, ITensor *accum);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    uint32_t _shift;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEACCUMULATEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h b/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
deleted file mode 100644
index a62f34cd58..0000000000
--- a/arm_compute/core/NEON/kernels/NEActivationLayerKernel.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H
-#define ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/utils/misc/Traits.h"
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#include <arm_fp16.h>
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the activation layer kernel. */
-class NEActivationLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEActivationLayerKernel";
-    }
-    /** Constructor */
-    NEActivationLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEActivationLayerKernel(const NEActivationLayerKernel &) = delete;
-    /** Default move constructor */
-    NEActivationLayerKernel(NEActivationLayerKernel &&) = default;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEActivationLayerKernel &operator=(const NEActivationLayerKernel &) = delete;
-    /** Default move assignment operator */
-    NEActivationLayerKernel &operator=(NEActivationLayerKernel &&) = default;
-    /** Set the input and output tensor.
-     *
-     * @note If the output tensor is a nullptr, the activation function will be performed in-place
-     *
-     * @param[in, out] input           Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result
-     *                                 of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
-     * @param[out]     output          Destination tensor info. Data type supported: same as @p input
-     * @param[in]      activation_info Activation layer information.
-     */
-    void configure(const ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo activation_info);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEActivationLayerKernel
-     *
-     * @param[in] input    Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
-     *                     of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
-     * @param[in] output   Destination tensor info. Data type supported: same as @p input
-     * @param[in] act_info Activation layer information.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info);
-
-    // Inherited methods overridden:
-    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-
-private:
-    ActivationLayerInfo _act_info;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEACTIVATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h
deleted file mode 100644
index eece5708e8..0000000000
--- a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H
-#define ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform addition between two tensors */
-class NEArithmeticAdditionKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEArithmeticAdditionKernel";
-    }
-    /** Default constructor */
-    NEArithmeticAdditionKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEArithmeticAdditionKernel(const NEArithmeticAdditionKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEArithmeticAdditionKernel &operator=(const NEArithmeticAdditionKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEArithmeticAdditionKernel(NEArithmeticAdditionKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEArithmeticAdditionKernel &operator=(NEArithmeticAdditionKernel &&) = default;
-    /** Default destructor */
-    ~NEArithmeticAdditionKernel() = default;
-
-    /** Initialise the kernel's input, output and border mode.
-     *
-     * Valid configurations (Input1,Input2) -> Output :
-     *
-     *   - (U8,U8)           -> U8
-     *   - (U8,U8)           -> S16
-     *   - (S16,U8)          -> S16
-     *   - (U8,S16)          -> S16
-     *   - (S16,S16)         -> S16
-     *   - (S32,S32)         -> S32
-     *   - (F16,F16)         -> F16
-     *   - (F32,F32)         -> F32
-     *   - (QASYMM8,QASYMM8) -> QASYMM8
-     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED
-     *   - (QSYMM16,QSYMM16) -> QSYMM16
-     *
-     * @param[in]  input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
-     * @param[in]  input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
-     * @param[out] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
-     * @param[in]  policy Overflow policy.
-     */
-    void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAdditionKernel
-     *
-     * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
-     * @param[in] input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32
-     * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32.
-     * @param[in] policy Overflow policy.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy);
-
-    // Inherited methods overridden:
-    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Common signature for all the specialised add functions
-     *
-     * @param[in]  input1 First input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32
-     * @param[in]  input2 Second input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32
-     * @param[out] output The output tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32.
-     * @param[in]  policy Overflow policy.
-     * @param[in]  window Region on which to execute the kernel.
-     */
-    using AddFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy, const Window &window);
-    /** Add function to use for the particular tensor types passed to configure() */
-    AddFunction *_func;
-    ConvertPolicy _policy;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEARITHMETICADDITIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
deleted file mode 100644
index 7d00d1f7d0..0000000000
--- a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
+++ /dev/null
@@ -1,118 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H
-#define ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform subtraction between two tensors */
-class NEArithmeticSubtractionKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEArithmeticSubtractionKernel";
-    }
-    /** Default constructor */
-    NEArithmeticSubtractionKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEArithmeticSubtractionKernel(const NEArithmeticSubtractionKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEArithmeticSubtractionKernel &operator=(const NEArithmeticSubtractionKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEArithmeticSubtractionKernel(NEArithmeticSubtractionKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEArithmeticSubtractionKernel &operator=(NEArithmeticSubtractionKernel &&) = default;
-    /** Default destructor */
-    ~NEArithmeticSubtractionKernel() = default;
-
-    /** Initialise the kernel's input and output.
-     *
-     * Valid configurations (Input1,Input2) -> Output :
-     *
-     *   - (U8,U8)                          -> U8
-     *   - (U8,U8)                          -> S16
-     *   - (QASYMM8, QASYMM8)               -> QASYMM8
-     *   - (QASYMM8_SIGNED, QASYMM8_SIGNED) -> QASYMM8_SIGNED
-     *   - (S16,U8)                         -> S16
-     *   - (U8,S16)                         -> S16
-     *   - (S16,S16)                        -> S16
-     *   - (S32,S32)                        -> S32
-     *   - (F16,F16)                        -> F16
-     *   - (F32,F32)                        -> F32
-     *
-     * @param[in]  input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
-     * @param[in]  input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
-     * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32.
-     * @param[in]  policy Overflow policy. Convert policy cannot be WRAP if datatype is quantized.
-     */
-    void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output, ConvertPolicy policy);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtractionKernel
-     *
-     * Valid configurations (Input1,Input2) -> Output :
-     *
-     *   - (U8,U8)                          -> U8
-     *   - (U8,U8)                          -> S16
-     *   - (QASYMM8, QASYMM8)               -> QASYMM8
-     *   - (QASYMM8_SIGNED, QASYMM8_SIGNED) -> QASYMM8_SIGNED
-     *   - (S16,U8)                         -> S16
-     *   - (U8,S16)                         -> S16
-     *   - (S16,S16)                        -> S16
-     *   - (S32,S32)                        -> S32
-     *   - (F16,F16)                        -> F16
-     *   - (F32,F32)                        -> F32
-     *
-     * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
-     * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
-     * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32.
-     * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, ConvertPolicy policy);
-
-    // Inherited methods overridden:
-    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Common signature for all the specialised sub functions
-     *
-     * @param[in]  input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
-     * @param[in]  input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32
-     * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/S32/F16/F32.
-     * @param[in]  window Region on which to execute the kernel.
-     * @param[in]  is_sat Flag to indicate if the policy is SATURATE.
-     */
-    using SubFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window, bool is_sat);
-    /** Sub function to use for the particular tensor types passed to configure() */
-    SubFunction *_func;
-    ConvertPolicy _policy;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEARITHMETICSUBTRACTIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h
deleted file mode 100644
index 478890925b..0000000000
--- a/arm_compute/core/NEON/kernels/NEBatchConcatenateLayerKernel.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H
-#define ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the batch concatenate kernel.
- *  The input tensor will be concatenated into the output tensor.
- */
-class NEBatchConcatenateLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEBatchConcatenateLayerKernel";
-    }
-    /** Default constructor */
-    NEBatchConcatenateLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEBatchConcatenateLayerKernel(const NEBatchConcatenateLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEBatchConcatenateLayerKernel &operator=(const NEBatchConcatenateLayerKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEBatchConcatenateLayerKernel(NEBatchConcatenateLayerKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEBatchConcatenateLayerKernel &operator=(NEBatchConcatenateLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEBatchConcatenateLayerKernel() = default;
-    /** Initialise the kernel's inputs and output
-     *
-     * @param[in]     input        Input tensor info. Data types supported: All.
-     * @param[in]     batch_offset The offset on axis # 3.
-     * @param[in,out] output       Output tensor info. Data types supported: Same as @p input.
-     *
-     * @note: The output tensor's low two dimensions can't be smaller than the input one's.
-     * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
-     *
-     */
-    void configure(const ITensorInfo *input, unsigned int batch_offset, ITensorInfo *output);
-    /**  Static function to check if given info will lead to a valid configuration of @ref NEBatchConcatenateLayerKernel
-     *
-     * @param[in] input        Input tensor info. Data types supported: All.
-     * @param[in] batch_offset The offset on axis # 3.
-     * @param[in] output       Output tensor info. Data types supported: Same as @p input.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, unsigned int batch_offset, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-
-private:
-    using BatchConcatFunction = void(const ITensor *in, ITensor *out, unsigned int batch_offset, const Window &window);
-
-private:
-    BatchConcatFunction *_func;
-    unsigned int         _batch_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEBATCHCONCATENATEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h
deleted file mode 100644
index 962d2565c0..0000000000
--- a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the batch normalization layer kernel.
- */
-class NEBatchNormalizationLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEBatchNormalizationLayerKernel";
-    }
-    /** Default constructor */
-    NEBatchNormalizationLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEBatchNormalizationLayerKernel(const NEBatchNormalizationLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEBatchNormalizationLayerKernel &operator=(const NEBatchNormalizationLayerKernel &) = delete;
-    /** Default Move Constructor. */
-    NEBatchNormalizationLayerKernel(NEBatchNormalizationLayerKernel &&) = default;
-    /** Default move assignment operator */
-    NEBatchNormalizationLayerKernel &operator=(NEBatchNormalizationLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEBatchNormalizationLayerKernel() = default;
-    /** Set the input and output tensors.
-     *
-     * @note If the output tensor is a nullptr, the batch normalization function will be performed in-place
-     *
-     * @param[in, out] input    Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
-     *                          3 lower dimensions represent a single input with dimensions [width, height, FM].
-     *                          The rest are optional and used for representing batches. Data types supported: F16/F32.
-     * @param[out]     output   Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
-     * @param[in]      mean     Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
-     * @param[in]      var      Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
-     * @param[in]      beta     (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input
-     * @param[in]      gamma    (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input
-     * @param[in]      epsilon  (Optional) Small value to avoid division with zero. Default value is 0.001f.
-     * @param[in]      act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
-     */
-    void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta = nullptr, const ITensor *gamma = nullptr, float epsilon = 0.001f,
-                   ActivationLayerInfo act_info = ActivationLayerInfo());
-    /** Static function to check if given info will lead to a valid configuration of @ref NEBatchNormalizationLayerKernel
-     *
-     * @param[in] input    Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result.
-     *                     3 lower dimensions represent a single input with dimensions [width, height, FM].
-     *                     The rest are optional and used for representing batches. Data types supported: F16/F32.
-     * @param[in] output   Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input
-     * @param[in] mean     Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
-     * @param[in] var      Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
-     * @param[in] beta     (Optional) Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for beta is 0. Data types supported: Same as @p input
-     * @param[in] gamma    (Optional) Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. If not provided, default value for gamma is 1. Data types supported: Same as @p input
-     * @param[in] epsilon  (Optional) Small value to avoid division with zero. Default value is 0.001f.
-     * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output,
-                           const ITensorInfo *mean, const ITensorInfo *var,
-                           const ITensorInfo *beta = nullptr, const ITensorInfo *gamma = nullptr,
-                           float epsilon = 0.001f, ActivationLayerInfo act_info = ActivationLayerInfo());
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Configure execution function in case of non-fused activation **/
-    void configure_non_fused();
-    /** Configure execution function in case of fused activation **/
-    void configure_fused();
-
-    /** Template function to run batch normalization on fp32
-     *
-     * @tparam T                Specialization data type
-     * @tparam fused_activation Boolean that flags if its a fused activation or not
-     * @tparam F                Activation function functor to run
-     *
-     * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
-     */
-    template <typename T, bool fused_activation, typename F>
-    void batch_normalization_nchw(const Window &window);
-    /** Template function to run batch normalization on fp32 on tensors with NHWC format
-     *
-     * @tparam T                Specialization data type
-     * @tparam fused_activation Boolean that flags if its a fused activation or not
-     * @tparam F                Activation function functor to run
-     *
-     * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
-     */
-    template <typename T, bool fused_activation, typename F>
-    void batch_normalization_nhwc(const Window &window);
-    /** Common signature for all the batch normalization functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using BatchNormFunctionPtr = void (NEBatchNormalizationLayerKernel::*)(const Window &window);
-
-private:
-    BatchNormFunctionPtr _func;
-    ITensor             *_input;
-    ITensor             *_output;
-    const ITensor       *_mean;
-    const ITensor       *_var;
-    const ITensor       *_gamma;
-    const ITensor       *_beta;
-    float                _epsilon;
-    ActivationLayerInfo  _act_info;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEBATCHNORMALIZATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h
deleted file mode 100644
index 943577d879..0000000000
--- a/arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H
-#define ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the batch to space kernel */
-class NEBatchToSpaceLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEBatchToSpaceLayerKernel";
-    }
-    /** Default constructor */
-    NEBatchToSpaceLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEBatchToSpaceLayerKernel(const NEBatchToSpaceLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEBatchToSpaceLayerKernel &operator=(const NEBatchToSpaceLayerKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEBatchToSpaceLayerKernel(NEBatchToSpaceLayerKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEBatchToSpaceLayerKernel &operator=(NEBatchToSpaceLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEBatchToSpaceLayerKernel() = default;
-    /** Initialise the kernel's inputs and output.
-     *
-     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All.
-     * @param[in]  block_shape 1-D tensor with shape [M]. Data types supported: S32
-     * @param[out] output      Tensor output. Data types supported: same as @p input
-     */
-    void configure(const ITensor *input, const ITensor *block_shape, ITensor *output);
-    /** Initialise the kernel's inputs and output (Static block shape).
-     *
-     * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported: All.
-     * @param[in]  block_shape_x Block shape x value.
-     * @param[in]  block_shape_y Block shape y value.
-     * @param[out] output        Tensor output. Data types supported: same as @p input
-     */
-    void configure(const ITensor *input, const int32_t block_shape_x, const int32_t block_shape_y, ITensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEBatchToSpaceLayerKernel
-     *
-     * @param[in] input       Tensor input. Supported tensor rank: 4. Data types supported: All.
-     * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
-     * @param[in] output      Tensor output. Data types supported: same as @p input
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEBatchToSpaceLayerKernel (Static block shape).
-     *
-     * @param[in] input         Tensor input. Supported tensor rank: 4. Data types supported: All.
-     * @param[in] block_shape_x Block shape x value.
-     * @param[in] block_shape_y Block shape y value.
-     * @param[in] output        Tensor output. Data types supported: same as @p input
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const int32_t block_shape_x, const int32_t block_shape_y, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input;       /**< Source tensor */
-    const ITensor *_block_shape; /**< Block shape tensor */
-    ITensor       *_output;      /**< Destination tensor */
-    DataLayout     _data_layout; /**< Data layout to  be used at run-time */
-
-    int32_t _block_shape_x;
-    int32_t _block_shape_y;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEBATCHTOSPACELAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h
deleted file mode 100644
index 0e4c886d34..0000000000
--- a/arm_compute/core/NEON/kernels/NEBitwiseAndKernel.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBITWISEANDKERNEL_H
-#define ARM_COMPUTE_NEBITWISEANDKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform bitwise AND between XY-planes of two tensors
- *
- * Result is computed by:
- * @f[ output(x,y) = input1(x,y) \land input2(x,y) @f]
- */
-class NEBitwiseAndKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEBitwiseAndKernel";
-    }
-    /** Default constructor */
-    NEBitwiseAndKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEBitwiseAndKernel(const NEBitwiseAndKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEBitwiseAndKernel &operator=(const NEBitwiseAndKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEBitwiseAndKernel(NEBitwiseAndKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEBitwiseAndKernel &operator=(NEBitwiseAndKernel &&) = default;
-    /** Initialise the kernel's inputs and output
-     *
-     * @param[in]  input1 An input tensor. Data type supported: U8.
-     * @param[in]  input2 An input tensor. Data type supported: U8
-     * @param[out] output Output tensor. Data type supported: U8.
-     */
-    void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input1; /**< Source tensor 1 */
-    const ITensor *_input2; /**< Source tensor 2 */
-    ITensor       *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEBITWISEANDKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h
deleted file mode 100644
index a20fdaec93..0000000000
--- a/arm_compute/core/NEON/kernels/NEBitwiseNotKernel.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBITWISENOTKERNEL_H
-#define ARM_COMPUTE_NEBITWISENOTKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform bitwise NOT operation
- *
- * Result is computed by:
- * @f[ output(x,y) = \lnot input(x,y) @f]
- */
-class NEBitwiseNotKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEBitwiseNotKernel";
-    }
-    /** Default constructor */
-    NEBitwiseNotKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEBitwiseNotKernel(const NEBitwiseNotKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEBitwiseNotKernel &operator=(const NEBitwiseNotKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEBitwiseNotKernel(NEBitwiseNotKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEBitwiseNotKernel &operator=(NEBitwiseNotKernel &&) = default;
-    /** Initialise the kernel's input and output
-     *
-     * @param[in]  input  An input tensor. Data type supported: U8.
-     * @param[out] output The output tensor. Data type supported: U8.
-     */
-    void configure(const ITensor *input, ITensor *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input;  /**< Source tensor */
-    ITensor       *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEBITWISENOTKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h
deleted file mode 100644
index 70db5fbeb6..0000000000
--- a/arm_compute/core/NEON/kernels/NEBitwiseOrKernel.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBITWISEORKERNEL_H
-#define ARM_COMPUTE_NEBITWISEORKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform bitwise inclusive OR between two tensors
- *
- * Result is computed by:
- * @f[ output(x,y) = input1(x,y) \lor input2(x,y) @f]
- */
-class NEBitwiseOrKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEBitwiseOrKernel";
-    }
-    /** Default constructor */
-    NEBitwiseOrKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEBitwiseOrKernel(const NEBitwiseOrKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEBitwiseOrKernel &operator=(const NEBitwiseOrKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEBitwiseOrKernel(NEBitwiseOrKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEBitwiseOrKernel &operator=(NEBitwiseOrKernel &&) = default;
-    /** Initialise the kernel's inputs and output.
-     *
-     * @param[in]  input1 An input tensor. Data type supported: U8.
-     * @param[in]  input2 An input tensor. Data type supported: U8
-     * @param[out] output Output tensor. Data type supported: U8.
-     */
-    void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input1; /**< Source tensor 1 */
-    const ITensor *_input2; /**< Source tensor 2 */
-    ITensor       *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEBITWISEORKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h b/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h
deleted file mode 100644
index 91f24f1c82..0000000000
--- a/arm_compute/core/NEON/kernels/NEBitwiseXorKernel.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBITWISEXORKERNEL_H
-#define ARM_COMPUTE_NEBITWISEXORKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform bitwise exclusive OR (XOR) between two tensors
- *
- * Result is computed by:
- * @f[ output(x,y) = input1(x,y) \oplus input2(x,y) @f]
- */
-class NEBitwiseXorKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEBitwiseXorKernel";
-    }
-    /** Default constructor */
-    NEBitwiseXorKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEBitwiseXorKernel(const NEBitwiseXorKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEBitwiseXorKernel &operator=(const NEBitwiseXorKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEBitwiseXorKernel(NEBitwiseXorKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEBitwiseXorKernel &operator=(NEBitwiseXorKernel &&) = default;
-    /** Initialise the kernel's input and output.
-     *
-     * @param[in]  input1 An input tensor. Data type supported: U8.
-     * @param[in]  input2 An input tensor. Data type supported: U8
-     * @param[out] output The output tensor. Data type supported: U8.
-     */
-    void configure(const ITensor *input1, const ITensor *input2, ITensor *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input1; /**< Source tensor 1 */
-    const ITensor *_input2; /**< Source tensor 2 */
-    ITensor       *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEBITWISEXORKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h b/arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h
deleted file mode 100644
index 8b3953a53a..0000000000
--- a/arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H
-#define ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the bounding box kernel */
-class NEBoundingBoxTransformKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEBoundingBoxTransformKernel";
-    }
-
-    /** Default constructor */
-    NEBoundingBoxTransformKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEBoundingBoxTransformKernel(const NEBoundingBoxTransformKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEBoundingBoxTransformKernel &operator=(const NEBoundingBoxTransformKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEBoundingBoxTransformKernel(NEBoundingBoxTransformKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEBoundingBoxTransformKernel &operator=(NEBoundingBoxTransformKernel &&) = default;
-    /** Default destructor */
-    ~NEBoundingBoxTransformKernel() = default;
-
-    /** Set the input and output tensors.
-     *
-     * @param[in]  boxes      Source tensor. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
-     * @param[out] pred_boxes Destination tensor. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
-     * @param[in]  deltas     Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K  is the number of classes.
-     *                        Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input.
-     * @param[in]  info       Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo.
-     *
-     * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
-     *
-     */
-    void configure(const ITensor *boxes, ITensor *pred_boxes, const ITensor *deltas, const BoundingBoxTransformInfo &info);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref CLBoundingBoxTransform
-     *
-     * @param[in] boxes      Source tensor info. Bounding box proposals in pixel coordinates. Size(M, 4), format [x1, y1, x2, y2]. Data types supported: QASYMM16/F16/F32.
-     * @param[in] pred_boxes Destination tensor info. Pixel coordinates of the transformed bounding boxes. Size (M, 4*K), format [x1, y1, x2, y2]. Data types supported: Same as @p input
-     * @param[in] deltas     Bounding box translations and scales. Size (M, 4*K), format [dx, dy, dw, dh], K  is the number of classes.
-     *                       Data types supported: QASYMM8 if @p input is QASYMM16, otherwise same as @p input.
-     * @param[in] info       Contains BoundingBox operation information described in @ref BoundingBoxTransformInfo.
-     *
-     * @note Only single image prediction is supported. Height and Width (and scale) of the image will be contained in the BoundingBoxTransformInfo struct.
-     *
-     * @return a Status
-     */
-    static Status validate(const ITensorInfo *boxes, const ITensorInfo *pred_boxes, const ITensorInfo *deltas, const BoundingBoxTransformInfo &info);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    template <typename T>
-    void internal_run(const Window &window);
-
-    const ITensor           *_boxes;
-    ITensor                 *_pred_boxes;
-    const ITensor           *_deltas;
-    BoundingBoxTransformInfo _bbinfo;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEBOUNDINGBOXTRANSFORMKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h b/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h
deleted file mode 100644
index 32e991e217..0000000000
--- a/arm_compute/core/NEON/kernels/NEBox3x3Kernel.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEBOX3x3KERNEL_H
-#define ARM_COMPUTE_NEBOX3x3KERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a Box 3x3 filter */
-class NEBox3x3Kernel : public INESimpleKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEBox3x3Kernel";
-    }
-    /** Set the source, destination and border mode of the kernel
-     *
-     * @param[in]  input            Source tensor. Data type supported: U8.
-     * @param[out] output           Destination tensor. Data type supported: U8.
-     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
-     */
-    void configure(const ITensor *input, ITensor *output, bool border_undefined);
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-};
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-/** NEON kernel to perform a Box 3x3 filter for FP16 datatype
- */
-class NEBox3x3FP16Kernel : public NEBox3x3Kernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEBox3x3FP16Kernel";
-    }
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-};
-#else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-/** NEON kernel to perform a Box 3x3 filter for FP16 datatype */
-using NEBox3x3FP16Kernel = NEBox3x3Kernel;
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEBOX3x3KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h b/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h
deleted file mode 100644
index c4e1f3ec3a..0000000000
--- a/arm_compute/core/NEON/kernels/NECannyEdgeKernel.h
+++ /dev/null
@@ -1,189 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECANNYEDGEKERNEL_H
-#define ARM_COMPUTE_NECANNYEDGEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Computes magnitude and quantised phase from inputs gradients. */
-class NEGradientKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEGradientKernel";
-    }
-    /** Default constructor */
-    NEGradientKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEGradientKernel(const NEGradientKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEGradientKernel &operator=(const NEGradientKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEGradientKernel(NEGradientKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEGradientKernel &operator=(NEGradientKernel &&) = default;
-    /** Default destructor */
-    virtual ~NEGradientKernel() = default;
-
-    /** Initialise the kernel's sources, destinations and border mode.
-     *
-     * @note gx, gy and magnitude must all be the same size (either 16 or 32)
-     *
-     * @param[in]  gx        Source tensor - Gx component. Data type supported: S16/S32.
-     * @param[in]  gy        Source tensor - Gy component. Data type supported: same as @p gx.
-     * @param[out] magnitude Destination tensor - Magnitude. Data type supported: U16 (if the data type of @p gx is S16) / U32 (if the data type of @p gx is S32).
-     * @param[out] phase     Destination tensor - Quantized phase. Data type supported: U8.
-     * @param[in]  norm_type Normalization type. If 1, L1-Norm otherwise L2-Norm
-     */
-    virtual void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase, int32_t norm_type);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-protected:
-    /** Common signature for all the specialised gradient functions
-     *
-     * @param[in]  gx_ptr        Pointer to the first input tensor.
-     * @param[in]  gy_ptr        Pointer to the second input tensor.
-     * @param[out] magnitude_ptr Pointer to the first output tensor
-     * @param[out] phase_ptr     Pointer to the second output tensor
-     */
-    using GradientFunction = void(const void *__restrict gx_ptr, const void *__restrict gy_ptr, void *__restrict magnitude_ptr, void *__restrict phase_ptr);
-
-    GradientFunction *_func;      /**< Gradient function to use for the particular tensor types passed to configure() */
-    const ITensor    *_gx;        /**< Source tensor - Gx component */
-    const ITensor    *_gy;        /**< Source tensor - Gy component */
-    ITensor          *_magnitude; /**< Destination tensor - Magnitude */
-    ITensor          *_phase;     /**< Destination tensor - Quantized phase */
-};
-
-/** NEON kernel to perform Non-Maxima suppression for Canny Edge.
- *
- * @note This kernel is meant to be used alongside CannyEdge and performs a non-maxima suppression using magnitude and phase of input
- *       to characterize points as possible edges. Thus, at the end, each point will be set to EDGE, NO_EDGE or MAYBE.
- *
- * @note Hysteresis is computed in @ref NEEdgeTraceKernel
- */
-class NEEdgeNonMaxSuppressionKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEEdgeNonMaxSuppressionKernel";
-    }
-    /** Default constructor */
-    NEEdgeNonMaxSuppressionKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEEdgeNonMaxSuppressionKernel(const NEEdgeNonMaxSuppressionKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEEdgeNonMaxSuppressionKernel &operator=(const NEEdgeNonMaxSuppressionKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEEdgeNonMaxSuppressionKernel(NEEdgeNonMaxSuppressionKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEEdgeNonMaxSuppressionKernel &operator=(NEEdgeNonMaxSuppressionKernel &&) = default;
-    /** Default destructor */
-    ~NEEdgeNonMaxSuppressionKernel() = default;
-
-    /** Initialise the kernel's sources, destination and border mode.
-     *
-     * @param[in]  magnitude        Source tensor - Magnitude. Data type supported: U16/U32.
-     * @param[in]  phase            Source tensor - Quantized phase. Data type supported: U8.
-     * @param[out] output           Output tensor. Data type supported: U8. It will be filled with 0 for "no edge", 127 for "maybe", 255 for "edge"
-     * @param[in]  upper_thr        Upper threshold used for the hysteresis
-     * @param[in]  lower_thr        Lower threshold used for the hysteresis
-     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
-     */
-    void configure(const ITensor *magnitude, const ITensor *phase, ITensor *output, int32_t upper_thr, int32_t lower_thr, bool border_undefined);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    /** Common signature for all the specialised non-maxima suppression functions
-     *
-     * @param[in]  magnitude_ptr Pointer to the first input tensor.
-     * @param[in]  phase_ptr     Pointer to the second input tensor.
-     * @param[out] output_ptr    Pointer to the output tensor
-     * @param[in]  stride_mag    Stride of the magnitude tensor
-     * @param[in]  upper_thr     Upper threshold used for the hysteresis
-     * @param[in]  lower_thr     Lower threshold used for the hysteresis
-     */
-    using EdgeNonMaxSupprFunction = void(const void *__restrict magnitude_ptr, const void *__restrict phase_ptr, void *__restrict output_ptr, const uint32_t stride_mag, const int32_t upper_thr,
-                                         const int32_t lower_thr);
-
-    EdgeNonMaxSupprFunction *_func;      /**< Non-Maxima suppression function to use for the particular tensor types passed to configure() */
-    const ITensor           *_magnitude; /**< Source tensor - Magnitude */
-    const ITensor           *_phase;     /**< Source tensor - Quantized phase */
-    ITensor                 *_output;    /**< Destination tensor */
-    int32_t                  _lower_thr; /**< Lower threshold used for the hysteresis */
-    int32_t                  _upper_thr; /**< Upper threshold used for the hysteresis */
-};
-
-/** NEON kernel to perform Edge tracing */
-class NEEdgeTraceKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEEdgeTraceKernel";
-    }
-    /** Default constructor */
-    NEEdgeTraceKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEEdgeTraceKernel(const NEEdgeTraceKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEEdgeTraceKernel &operator=(const NEEdgeTraceKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEEdgeTraceKernel(NEEdgeTraceKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEEdgeTraceKernel &operator=(NEEdgeTraceKernel &&) = default;
-    /** Default constructor */
-    ~NEEdgeTraceKernel() = default;
-
-    /** Initialise the kernel's source, destination and border mode.
-     *
-     * @param[in,out] input  Source tensor. Data type supported: U8. Must contain 0 for "no edge", 127 for "maybe", 255 for "edge"
-     * @param[in,out] output Destination tensor. Data type supported: U8. Must be initialized to 0 (No edge).
-     */
-    void configure(ITensor *input, ITensor *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-    bool       is_parallelisable() const override;
-
-private:
-    ITensor *_input;  /**< Source tensor */
-    ITensor *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NECANNYEDGEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h b/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h
deleted file mode 100644
index 5d32aed573..0000000000
--- a/arm_compute/core/NEON/kernels/NEChannelCombineKernel.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H
-#define ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include <array>
-#include <cstdint>
-
-namespace arm_compute
-{
-class IMultiImage;
-class ITensor;
-using IImage = ITensor;
-
-/** Interface for the channel combine kernel */
-class NEChannelCombineKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEChannelCombineKernel";
-    }
-    /** Default constructor */
-    NEChannelCombineKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEChannelCombineKernel(const NEChannelCombineKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEChannelCombineKernel &operator=(const NEChannelCombineKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEChannelCombineKernel(NEChannelCombineKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEChannelCombineKernel &operator=(NEChannelCombineKernel &&) = default;
-    /** Default destructor */
-    ~NEChannelCombineKernel() = default;
-
-    /** Configure function's inputs and outputs.
-     *
-     * @param[in]  plane0 The 2D plane that forms channel 0. Data type supported: U8
-     * @param[in]  plane1 The 2D plane that forms channel 1. Data type supported: U8
-     * @param[in]  plane2 The 2D plane that forms channel 2. Data type supported: U8
-     * @param[in]  plane3 The 2D plane that forms channel 3. Data type supported: U8
-     * @param[out] output The single planar output tensor. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
-     */
-    void configure(const ITensor *plane0, const ITensor *plane1, const ITensor *plane2, const ITensor *plane3, ITensor *output);
-    /** Configure function's inputs and outputs.
-     *
-     * @param[in]  plane0 The 2D plane that forms channel 0. Data type supported: U8
-     * @param[in]  plane1 The 2D plane that forms channel 1. Data type supported: U8
-     * @param[in]  plane2 The 2D plane that forms channel 2. Data type supported: U8
-     * @param[out] output The multi planar output tensor. Formats supported: NV12/NV21/IYUV/YUV444
-     */
-    void configure(const IImage *plane0, const IImage *plane1, const IImage *plane2, IMultiImage *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    bool is_parallelisable() const override;
-
-private:
-    /** Combine 3 planes to form a three channel single plane tensor.
-     *
-     * @param[in] win Region on which to execute the kernel.
-     */
-    void combine_3C(const Window &win);
-    /** Combine 4 planes to form a four channel single plane tensor.
-     *
-     * @param[in] win Region on which to execute the kernel.
-     */
-    void combine_4C(const Window &win);
-    /** Combine 3 planes to form a single plane YUV tensor.
-     *
-     * @param[in] win Region on which to execute the kernel.
-     */
-    template <bool is_yuyv>
-    void combine_YUV_1p(const Window &win);
-    /** Combine 3 planes to form a two plane YUV tensor.
-     *
-     * @param[in] win Region on which to execute the kernel.
-     */
-    void combine_YUV_2p(const Window &win);
-    /** Combine 3 planes to form a three plane YUV tensor.
-     *
-     * @param[in] win Region on which to execute the kernel.
-     */
-    void combine_YUV_3p(const Window &win);
-    /** Copies a full plane to the output tensor.
-     *
-     * @param[in] win Region on which to execute the kernel.
-     */
-    void copy_plane(const Window &win, uint32_t plane_id);
-    /** Common signature for all the specialised ChannelCombine functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using ChannelCombineFunction = void (NEChannelCombineKernel::*)(const Window &window);
-    /** ChannelCombine function to use for the particular tensor types passed to configure() */
-    ChannelCombineFunction _func;
-    std::array<const ITensor *, 4> _planes;
-    ITensor     *_output;
-    IMultiImage *_output_multi;
-    std::array<uint32_t, 3> _x_subsampling;
-    std::array<uint32_t, 3> _y_subsampling;
-    unsigned int _num_elems_processed_per_iteration;
-    bool         _is_parallelizable;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NECHANNELCOMBINEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h b/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h
deleted file mode 100644
index debae2488f..0000000000
--- a/arm_compute/core/NEON/kernels/NEChannelExtractKernel.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H
-#define ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class IMultiImage;
-class ITensor;
-using IImage = ITensor;
-
-/** Interface for the channel extract kernel */
-class NEChannelExtractKernel : public INESimpleKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEChannelExtractKernel";
-    }
-    /** Default constructor */
-    NEChannelExtractKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEChannelExtractKernel(const NEChannelExtractKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEChannelExtractKernel &operator=(const NEChannelExtractKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEChannelExtractKernel(NEChannelExtractKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEChannelExtractKernel &operator=(NEChannelExtractKernel &&) = default;
-    /** Default destructor */
-    ~NEChannelExtractKernel() = default;
-
-    /** Set the input and output of the kernel
-     *
-     * @param[in]  input   Source tensor. Formats supported: RGB888/RGBA8888/YUYV422/UYVY422
-     * @param[in]  channel Channel to extract.
-     * @param[out] output  Destination tensor. Format supported: U8
-     */
-    void configure(const ITensor *input, Channel channel, ITensor *output);
-    /** Set the input and output of the kernel
-     *
-     * @param[in]  input   Multi-planar source image. Formats supported: NV12/NV21/IYUV/YUV444
-     * @param[in]  channel Channel to extract.
-     * @param[out] output  Single-planar destination image. Format supported: U8
-     */
-    void configure(const IMultiImage *input, Channel channel, IImage *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Extract one channel from a two channel planar tensor.
-     *
-     * @param[in] win Region on which to execute the kernel.
-     */
-    void extract_1C_from_2C_img(const Window &win);
-    /** Extract one channel from a three channel planar tensor.
-     *
-     * @param[in] win Region on which to execute the kernel.
-     */
-    void extract_1C_from_3C_img(const Window &win);
-    /** Extract one channel from a four channel planar tensor.
-     *
-     * @param[in] win Region on which to execute the kernel.
-     */
-    void extract_1C_from_4C_img(const Window &win);
-    /** Extract U/V channel from a single planar YUVY/UYVY tensor.
-     *
-     * @param[in] win Region on which to execute the kernel.
-     */
-    void extract_YUYV_uv(const Window &win);
-    /** Copies a full plane to the output tensor.
-     *
-     * @param[in] win Region on which to execute the kernel.
-     */
-    void copy_plane(const Window &win);
-    /** Common signature for all the specialised ChannelExtract functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using ChannelExtractFunction = void (NEChannelExtractKernel::*)(const Window &window);
-    /** ChannelExtract function to use for the particular tensor types passed to configure() */
-    ChannelExtractFunction _func;
-    unsigned int           _lut_index;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NECHANNELEXTRACTKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h b/arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h
deleted file mode 100644
index e5bce7e273..0000000000
--- a/arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H
-#define ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the channel shuffle kernel */
-class NEChannelShuffleLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEChannelShuffleLayerKernel";
-    }
-    /** Default constructor */
-    NEChannelShuffleLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEChannelShuffleLayerKernel(const NEChannelShuffleLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEChannelShuffleLayerKernel &operator=(const NEChannelShuffleLayerKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEChannelShuffleLayerKernel(NEChannelShuffleLayerKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEChannelShuffleLayerKernel &operator=(NEChannelShuffleLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEChannelShuffleLayerKernel() = default;
-    /** Configure function's inputs and outputs.
-     *
-     * @param[in]  input      Input tensor. Data types supported: All
-     * @param[out] output     Output tensor. Data type supported: Same as @p input
-     * @param[in]  num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
-     */
-    void configure(const ITensor *input, ITensor *output, unsigned int num_groups);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEChannelShuffleLayerKernel
-     *
-     * @param[in]  input      Input tensor. Data types supported: All
-     * @param[out] output     Output tensor. Data type supported: Same as @p input
-     * @param[in]  num_groups Number of groups. Must be greater than 1 and the number of channels of the tensors must be a multiple of the number of groups.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input;
-    ITensor       *_output;
-    unsigned int   _num_groups;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NECHANNELSHUFFLELAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NECol2ImKernel.h b/arm_compute/core/NEON/kernels/NECol2ImKernel.h
deleted file mode 100644
index e988771599..0000000000
--- a/arm_compute/core/NEON/kernels/NECol2ImKernel.h
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECOL2IMKERNEL_H
-#define ARM_COMPUTE_NECOL2IMKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include "arm_compute/core/Size2D.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform col2im reshaping.
- *
- * Rearranges each matrix column into image blocks. It's the inverse operation of @ref NEIm2ColKernel.
- *
- * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3:
- *
- * @f[
- * \left( \begin{array}{ccccccccc}
- * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{ccc}
- * a0 & a1 & a2 \\
- * a3 & a4 & a5 \\
- * a6 & a7 & a8 \\
- * \end{array} \right)
- * @f]
- */
-class NECol2ImKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NECol2ImKernel";
-    }
-    /** Default constructor */
-    NECol2ImKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NECol2ImKernel(const NECol2ImKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NECol2ImKernel &operator=(const NECol2ImKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NECol2ImKernel(NECol2ImKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NECol2ImKernel &operator=(NECol2ImKernel &&) = default;
-    /** Default destructor */
-    ~NECol2ImKernel() = default;
-
-    /** Set the input and output of the kernel.
-     *
-     * @param[in]  input          The input tensor to convert. Data types supported: All
-     * @param[out] output         The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
-     *                            while the rest represent batch of outputs. Data types supported: Same as @p input
-     * @param[in]  convolved_dims Output convolved dimensions.
-     */
-    void configure(const ITensor *input, ITensor *output, const Size2D &convolved_dims);
-    /** Static function to check if given info will lead to a valid configuration of @ref NECol2ImKernel
-     *
-     * @param[in] input          The input tensor to convert. Data types supported: All
-     * @param[in] output         The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
-     *                           while the rest represent batch of outputs. Data types supported: Same as @p input
-     * @param[in] convolved_dims Output convolved dimensions.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &convolved_dims);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Template function to run the col2im
-     *
-     * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
-     */
-    template <typename T>
-    void run_col2im(const Window &window);
-
-    /** Common signature for all the specialised col2im functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using Col2ImFunctionPtr = void (NECol2ImKernel::*)(const Window &window);
-
-    Col2ImFunctionPtr _func;
-    const ITensor    *_input;
-    ITensor          *_output;
-    Size2D            _convolved_dims;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECOL2IMKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEColorConvertKernel.h b/arm_compute/core/NEON/kernels/NEColorConvertKernel.h
deleted file mode 100644
index 88c03b7607..0000000000
--- a/arm_compute/core/NEON/kernels/NEColorConvertKernel.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_COLORCONVERTKERNEL_H
-#define ARM_COMPUTE_COLORCONVERTKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class IMultiImage;
-class ITensor;
-using IImage = ITensor;
-
-/** Interface for the color convert kernel */
-class NEColorConvertKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEColorConvertKernel";
-    }
-    /** Default constructor */
-    NEColorConvertKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEColorConvertKernel(const NEColorConvertKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEColorConvertKernel &operator=(const NEColorConvertKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEColorConvertKernel(NEColorConvertKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEColorConvertKernel &operator=(NEColorConvertKernel &&) = default;
-    /** Default destructor */
-    ~NEColorConvertKernel() = default;
-
-    /** Set the input and output of the kernel
-     *
-     * @param[in]  input  Source tensor. Formats supported: RGBA8888/UYVY422/YUYV422/RGB888
-     * @param[out] output Destination tensor. Formats supported: RGB888 (if the formats of @p input are RGBA8888/UYVY422/YUYV422),
-     *                                                          RGBA8888 (if the formats of @p input are UYVY422/YUYV422/RGB888/),
-     *                                                          U8 (if the formats of @p input is RGB888)
-     */
-    void configure(const ITensor *input, ITensor *output);
-    /** Set the input and output of the kernel
-     *
-     * @param[in]  input  Multi-planar source image. Formats supported: NV12/NV21/IYUV
-     * @param[out] output Single-planar destination image. Formats supported: RGB888/RGBA8888
-     */
-    void configure(const IMultiImage *input, IImage *output);
-    /** Set the input and output of the kernel
-     *
-     * @param[in]  input  Single-planar source image. Formats supported: RGB888/RGBA8888/UYVY422/YUYV422
-     * @param[out] output Multi-planar destination image. Formats supported: NV12/IYUV/YUV444 (if the formats of @p input are RGB888/RGB8888)
-     */
-    void configure(const IImage *input, IMultiImage *output);
-    /** Set the input and output of the kernel
-     *
-     * @param[in]  input  Multi-planar source image. Formats supported: NV12/NV21/IYUV
-     * @param[out] output Multi-planar destination image. Formats supported: YUV444/IYUV (if the formats of @p input are NV12/NV21)/NV12 (if the format of  @p input is IYUV)
-     */
-    void configure(const IMultiImage *input, IMultiImage *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    using ColorConvertFunction = void(const void *__restrict input_ptr, void *__restrict output_ptr, const Window &win);
-    const void           *_input;
-    void                 *_output;
-    ColorConvertFunction *_func;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECOLORCONVERTKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h b/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h
deleted file mode 100644
index dadf9e9b94..0000000000
--- a/arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H
-#define ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface to convert the 2D Fully Connected weights from NCHW to NHWC or vice versa.
- *
- * @note This function can be applied to the 2D weights used by a Fully Connected layer if:
- *       - It follows a Convolution layer
- *       - The data layout used by the network does not match the one the model has been trained in.
- *
- * @note This function assumes the weights are already reshaped (transposed)
- */
-class NEConvertFullyConnectedWeightsKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEConvertFullyConnectedWeightsKernel";
-    }
-    /** Default constructor */
-    NEConvertFullyConnectedWeightsKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEConvertFullyConnectedWeightsKernel(const NEConvertFullyConnectedWeightsKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEConvertFullyConnectedWeightsKernel &operator=(const NEConvertFullyConnectedWeightsKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEConvertFullyConnectedWeightsKernel(NEConvertFullyConnectedWeightsKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEConvertFullyConnectedWeightsKernel &operator=(NEConvertFullyConnectedWeightsKernel &&) = default;
-    /** Default destructor */
-    ~NEConvertFullyConnectedWeightsKernel() = default;
-    /** Set the input and output tensor.
-     *
-     * @param[in]  input                Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
-     * @param[out] output               The converted weights tensor. Shape and Data Type: Same as @p input.
-     * @param[in]  original_input_shape Shape of the original input tensor (the one entering fully connected layer).
-     * @param[in]  data_layout          The data layout the weights have been trained in.
-     */
-    void configure(const ITensor *input, ITensor *output, const TensorShape &original_input_shape, DataLayout data_layout);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEConvertFullyConnectedWeightsKernel
-     *
-     * @param[in] input                Source weights tensor info to convert. Must be 2 dimensional. Data types supported: All.
-     * @param[in] output               The converted weights tensor info. Shape and Data Type: Same as @p input.
-     * @param[in] original_input_shape Shape of the original input tensor (the one entering fully connected layer).
-     * @param[in] data_layout          The data layout the weights have been trained in.
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const TensorShape &original_input_shape, DataLayout data_layout);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Template function to run the permute
-     *
-     * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
-     */
-    template <typename T>
-    void run_convert_fc_weights(const Window &window);
-
-    const ITensor *_input;
-    ITensor       *_output;
-    unsigned int   _factor1; /*  equals to the number of elements per original input plane if @p data_layout == NCHW; its number of channels otherwise */
-    unsigned int   _factor2; /*  equals to the number of elements per original input plane if @p data_layout == NHWC; its number of channels otherwise */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTSKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h b/arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h
deleted file mode 100644
index 6c74a1216c..0000000000
--- a/arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H
-#define ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** NEON kernel to convert asymmetric signed to asymmetric signed and vice-versa */
-class NEConvertQuantizedSignednessKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEConvertQuantizedSignednessKernel";
-    }
-    /** Default constructor */
-    NEConvertQuantizedSignednessKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
-    NEConvertQuantizedSignednessKernel(const NEConvertQuantizedSignednessKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
-    NEConvertQuantizedSignednessKernel &operator=(const NEConvertQuantizedSignednessKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEConvertQuantizedSignednessKernel(NEConvertQuantizedSignednessKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEConvertQuantizedSignednessKernel &operator=(NEConvertQuantizedSignednessKernel &&) = default;
-    /** Initialize the kernel's input, output.
-     *
-     * @param[in]  input  Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED.
-     * @param[out] output Destination tensor. Data types supported: opposite of @p input.
-     */
-    void configure(const ITensor *input, ITensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NECopyKernel
-     *
-     * @param[in] input  Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED.
-     * @param[in] output Destination tensor. Data types supported: opposite of @p input.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input;
-    ITensor       *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECONVERTQUANTIZEDSIGNEDNESSKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEConvolutionKernel.h b/arm_compute/core/NEON/kernels/NEConvolutionKernel.h
deleted file mode 100644
index 51a63335ff..0000000000
--- a/arm_compute/core/NEON/kernels/NEConvolutionKernel.h
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECONVOLUTIONKERNEL_H
-#define ARM_COMPUTE_NECONVOLUTIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-#include <array>
-#include <cstdint>
-#include <vector>
-
-namespace arm_compute
-{
-class ITensor;
-
-/****************************************************************************************\
- *                                    Square Convolution                                *
-\****************************************************************************************/
-
-/** Interface for the kernel to run an arbitrary size convolution on a tensor. (Currently supports 3x3, 5x5, 7x7 and 9x9).
- * The client can supply a convolution matrix \f$ C_{m,n} \f$.
- * @f{eqnarray}{
- *  k_0 &=& \frac{m}{2}  \\
- *  l_0 &=& \frac{n}{2}  \\
- *  sum &=& \sum_{k=0,l=0}^{k=m-1,l=n-1} input(x+k-k_0, y+l-l_0) C_{k,l}
- *  @f}
- *
- * @note The above equation for this function is similar to the default OpenCV Filter2D function,
- *       which actually computes a correlation and not a convolution.
- *       In case of a real convolution the convolution matrix should be flipped both horizontally and vertically.
- */
-template <unsigned int matrix_size>
-class NEConvolutionKernel : public INESimpleKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEConvolutionKernel";
-    }
-    /** Default constructor */
-    NEConvolutionKernel();
-    /** Initialise the kernel's input, output and border mode.
-     *
-     * @param[in]  input            Source tensor. Data type supported: U8.
-     * @param[out] output           Destination tensor. Data types supported: U8, S16.
-     * @param[in]  conv             Convolution matrix to apply to the input tensor.
-     * @param[in]  scale            Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
-     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
-     */
-    void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, bool border_undefined);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    template <typename OutputType>
-    void convolution(const Window &win);
-
-protected:
-    uint32_t _scale;                                             /**< scale of the convolution */
-    std::array<int16_t, matrix_size *matrix_size> _convolution;  /**< convolution matrix */
-};
-
-/** Interface for the kernel which applied a 3x3 convolution to a tensor.*/
-using NEConvolution3x3Kernel = NEConvolutionKernel<3>;
-/** Interface for the kernel which applied a 5x5 convolution to a tensor.*/
-using NEConvolution5x5Kernel = NEConvolutionKernel<5>;
-/** Interface for the kernel which applied a 7x7 convolution to a tensor.*/
-using NEConvolution7x7Kernel = NEConvolutionKernel<7>;
-///** Interface for the kernel which applied a 9x9 convolution to a tensor.*/
-using NEConvolution9x9Kernel = NEConvolutionKernel<9>;
-
-/****************************************************************************************\
- *                              Separable Square Convolution                            *
-\****************************************************************************************/
-
-/** Kernel for the Horizontal pass of a Separable Convolution */
-template <unsigned int matrix_size>
-class NESeparableConvolutionHorKernel : public INESimpleKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NESeparableConvolutionHorKernel";
-    }
-    /** Default constructor */
-    NESeparableConvolutionHorKernel();
-
-    /** Initialise the kernel's input, output and border mode.
-     *
-     * @param[in]  input            Source tensor. Data type supported: U8.
-     * @param[out] output           Destination tensor. Data types supported: U16, S16, S32.
-     * @param[in]  conv_row         Convolution matrix to apply to the input tensor.
-     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
-     */
-    void configure(const ITensor *input, ITensor *output, const int16_t *conv_row, bool border_undefined);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    /** Apply the object's convolution to the given window of the input tensor..
-     *
-     * @param[in] window Window to apply the convolution on.
-     */
-    template <typename OutputType>
-    void convolve(const Window &window);
-
-    std::array<int16_t, matrix_size> _conv_row; /**< Convolution coefficients */
-    BorderSize _border_size;                    /**< Border size */
-};
-
-/** Interface for the kernel which applied a 5x1 horizontal convolution to a tensor.*/
-using NESeparableConvolution5x5HorKernel = NESeparableConvolutionHorKernel<5>;
-/** Interface for the kernel which applied a 7x1 horizontal convolution to a tensor.*/
-using NESeparableConvolution7x7HorKernel = NESeparableConvolutionHorKernel<7>;
-/** Interface for the kernel which applied a 9x1 horizontal convolution to a tensor.*/
-using NESeparableConvolution9x9HorKernel = NESeparableConvolutionHorKernel<9>;
-
-/** Kernel for the Vertical pass of a Separable Convolution */
-template <unsigned int matrix_size>
-class NESeparableConvolutionVertKernel : public INESimpleKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NESeparableConvolutionVertKernel";
-    }
-    /** Default constructor */
-    NESeparableConvolutionVertKernel();
-
-    /** Initialise the kernel's input, output and border mode.
-     *
-     * @param[in]  input            Source tensor. Data type supported: U16, S16, S32.
-     * @param[out] output           Destination tensor, Data types supported: U8, S16.
-     * @param[in]  conv_col         Convolution matrix to apply to the input tensor.
-     * @param[in]  scale            Scale of the convolution matrix
-     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
-     */
-    void configure(const ITensor *input, ITensor *output, const int16_t *conv_col, uint32_t scale, bool border_undefined);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    /** Apply the object's convolution to the given window of the input tensor.
-     *  This function is used if the intermediate values have been stored as U16.
-     *
-     * @param[in] win Window to apply the convolution on.
-     */
-    template <typename OutputType>
-    void convolution_u16(const Window &win);
-    /** Apply the object's convolution to the given window of the input tensor.
-     *  This function is used if the intermediate values have been stored as S16.
-     *
-     * @param[in] win Window to apply the convolution on.
-     */
-    template <typename OutputType>
-    void convolution_s16(const Window &win);
-    /** Apply the object's convolution to the given window of the input tensor.
-     *  This function is used if the intermediate values have been stored as S32.
-     *
-     * @param[in] win Window to apply the convolution on.
-     */
-    template <typename OutputType>
-    void convolution_s32(const Window &win);
-
-    std::array<int16_t, matrix_size> _conv_col; /**< Convolution coefficients */
-    uint32_t _scale;                            /**< Convolution's scale */
-};
-
-/** Interface for the kernel which applied a 1x5 vertical convolution to a tensor.*/
-using NESeparableConvolution5x5VertKernel = NESeparableConvolutionVertKernel<5>;
-/** Interface for the kernel which applied a 1x7 vertical convolution to a tensor.*/
-using NESeparableConvolution7x7VertKernel = NESeparableConvolutionVertKernel<7>;
-/** Interface for the kernel which applied a 1x9 vertical convolution to a tensor.*/
-using NESeparableConvolution9x9VertKernel = NESeparableConvolutionVertKernel<9>;
-
-/****************************************************************************************\
- *                                 Rectangle Convolution                                *
-\****************************************************************************************/
-
-/** Kernel for the running convolution on a rectangle matrix.
- *
- * @note Supports combinations of 3,5,7 and 9.
- */
-class NEConvolutionRectangleKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEConvolutionRectangleKernel";
-    }
-    /** Default constructor */
-    NEConvolutionRectangleKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEConvolutionRectangleKernel(NEConvolutionRectangleKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEConvolutionRectangleKernel &operator=(NEConvolutionRectangleKernel &&) = default;
-    /** Initialise the kernel's input, output and border mode.
-     *
-     * @param[in]  input            Source tensor. Data type supported: U8.
-     * @param[out] output           Destination tensor, Data types supported: U8, S16.
-     * @param[in]  conv             Convolution matrix to apply to the input tensor.
-     * @param[in]  width            Width of convolution matrix (Number of columns)
-     * @param[in]  height           Height of convolution matrix (Number of rows)
-     * @param[in]  scale            Scale of the convolution matrix. If 0 is passed, it will be set to the sum of the coefficients of the convolution or 1 if they add up to 0.
-     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
-     */
-    void configure(const ITensor *input, ITensor *output, const int16_t *conv, uint32_t width, uint32_t height, uint32_t scale, bool border_undefined);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    unsigned int get_index(uint32_t val);
-    /** Apply the object's convolution to the given window of the input tensor.
-     *
-     * @param[in] win Window to apply the convolution on.
-     */
-    template <typename OutputType, unsigned int rows, unsigned int cols>
-    void convolution(const Window &win);
-
-protected:
-    const ITensor            *_input;       /**< Input tensor */
-    ITensor                  *_output;      /**< Output tensor */
-    uint32_t                  _scale;       /**< Scale of the convolution */
-    std::vector<int16_t>      _convolution; /**< Convolution matrix */
-    BorderSize                _border_size; /**< Calculated border width */
-    uint32_t                  _func_idx;    /**< Index used to specify convolution function to be used */
-    const static unsigned int _nr_supported_sizes
-    {
-        4
-    }; /**< Number of supported permutations */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECONVOLUTIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NECopyKernel.h b/arm_compute/core/NEON/kernels/NECopyKernel.h
deleted file mode 100644
index ddd14c18b8..0000000000
--- a/arm_compute/core/NEON/kernels/NECopyKernel.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECOPYKERNEL_H
-#define ARM_COMPUTE_NECOPYKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a copy between two tensors */
-class NECopyKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NECopyKernel";
-    }
-    /** Default constructor */
-    NECopyKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
-    NECopyKernel(const NECopyKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
-    NECopyKernel &operator=(const NECopyKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NECopyKernel(NECopyKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NECopyKernel &operator=(NECopyKernel &&) = default;
-    /** Initialize the kernel's input, output.
-     *
-     * @param[in]  input   Source tensor. Data types supported: All
-     * @param[out] output  Destination tensor. Data types supported: same as @p input.
-     * @param[in]  padding (Optional) Padding to be applied to the input tensor
-     */
-    void configure(const ITensor *input, ITensor *output, const PaddingList &padding = PaddingList());
-    /** Static function to check if given info will lead to a valid configuration of @ref NECopyKernel
-     *
-     * @param[in] input   Source tensor. Data types supported: All
-     * @param[in] output  Destination tensor. Data types supported: same as @p input.
-     * @param[in] padding (Optional) Padding to be applied to the input tensor
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding = PaddingList());
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input;
-    ITensor       *_output;
-    PaddingList    _padding;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECOPYKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NECropKernel.h b/arm_compute/core/NEON/kernels/NECropKernel.h
deleted file mode 100644
index b7e185f550..0000000000
--- a/arm_compute/core/NEON/kernels/NECropKernel.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEON_CROP_KERNEL_H
-#define ARM_COMPUTE_NEON_CROP_KERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the kernel to perform tensor cropping */
-class NECropKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NECropKernel";
-    }
-    /** Default constructor */
-    NECropKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NECropKernel(const NECropKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NECropKernel &operator=(const NECropKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NECropKernel(NECropKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NECropKernel &operator=(NECropKernel &&) = default;
-    /** Default destructor */
-    ~NECropKernel() = default;
-    /** Configure kernel
-     *
-     * @note Supported tensor rank: up to 4
-     * @note Padding not supported.
-     *
-     * @param[in]  input               Source tensor. Data type supported: U8/U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC.
-     * @param[in]  crop_boxes          Tensor containing all possible boxes used to crop the image, each represented by 4 normalized values.
-     *                                 Data type supported: F32
-     * @param[in]  box_ind             One dimensional tensor mapping the @p crop_box_ind to the index of the 3D image in @p input.
-     *                                 Data type supported: F32
-     * @param[out] output              Destination tensor. Data type supported: F32
-     * @param[in]  crop_box_ind        Index of the crop box to be used from @p crop_boxes. Default is 0.
-     * @param[in]  extrapolation_value Value to be used for values outside of the image. Default is 0.
-     */
-    void configure(const ITensor *input, const ITensor *crop_boxes, const ITensor *box_ind, ITensor *output, uint32_t crop_box_ind = 0, float extrapolation_value = 0);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref CLStridedSliceKernel
-     *
-     * @note Supported tensor rank: up to 4
-     * @note Padding not supported.
-     *
-     * @param[in] input               Source tensor info. Data type supported: U8/U16/S16/U32/S32/F16/F32. Data layouts supported: NHWC.
-     * @param[in] crop_boxes          Tensor info for tensor containing all possible boxes used to crop the image. Data type supported: F32
-     * @param[in] box_ind             Tensor info for the one dimensional tensor mapping the @p crop_box_ind to the index of the 3D image
-     *                                in @p input. Data type supported: F32
-     * @param[in] output              Destination tensor. Data type supported: F32
-     * @param[in] crop_box_ind        Index of the crop box to be used from @p crop_boxes. Default is 0.
-     * @param[in] extrapolation_value Value to be used for values outside of the image. Default is 0.
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *crop_boxes, const ITensorInfo *box_ind, const ITensorInfo *output, uint32_t crop_box_ind = 0, float extrapolation_value = 0);
-
-    /** Configure output tensor's shape as this can only be determined at runtime. */
-    void configure_output_shape();
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-    /** Function to use for in bounds crop for the particular tensor types passed to configure() */
-    using InBoundsCropFunction = void(const ITensor *, const ITensor *, float *, Coordinates, int32_t, int32_t, int32_t, bool, bool);
-
-private:
-    const ITensor *_input;
-    const ITensor *_crop_boxes;
-    const ITensor *_box_ind;
-    ITensor       *_output;
-
-    Coordinates _start;
-    Coordinates _end;
-    uint32_t    _crop_box_ind;
-    float       _extrapolation_value;
-    /** The number of rows out of bounds at the start and end of output. */
-    std::array<uint32_t, 2> _rows_out_of_bounds;
-    /** The number of columns out of bounds at the start and end of output. */
-    std::array<uint32_t, 2> _cols_out_of_bounds;
-
-    NECropKernel::InBoundsCropFunction *_in_bounds_crop_function;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEON_CROP_KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h b/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h
deleted file mode 100644
index e4fe81a5d5..0000000000
--- a/arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H
-#define ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class IDistribution1D;
-class ILut;
-class ITensor;
-using IImage = ITensor;
-
-/** Interface for the cumulative distribution (cummulative summmation) calculation kernel.
- *
- * This kernel calculates the cumulative sum of a given distribution (meaning that each output element
- * is the sum of all its previous elements including itself) and creates a lookup table with the normalized
- * pixel intensities which is used for improve the constrast of the image.
- */
-class NECumulativeDistributionKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NECumulativeDistributionKernel";
-    }
-    /** Default constructor */
-    NECumulativeDistributionKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NECumulativeDistributionKernel(const NECumulativeDistributionKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NECumulativeDistributionKernel &operator=(const NECumulativeDistributionKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NECumulativeDistributionKernel(NECumulativeDistributionKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NECumulativeDistributionKernel &operator=(NECumulativeDistributionKernel &&) = default;
-    /** Set the input and output distribution.
-     *
-     * @param[in]  input          Input image. Data type supported: U8
-     * @param[in]  distribution   Unnormalized 256-bin distribution of the input image.
-     * @param[out] cumulative_sum Cummulative distribution (Summed histogram). Should be same size as @p distribution.
-     * @param[out] output         Equalization lookup table. Should consist of 256 entries of U8 elements.
-     */
-    void configure(const IImage *input, const IDistribution1D *distribution, IDistribution1D *cumulative_sum, ILut *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    bool is_parallelisable() const override;
-
-private:
-    const IImage          *_input;          /**< Input image. */
-    const IDistribution1D *_distribution;   /**< Input histogram of the input image. */
-    IDistribution1D       *_cumulative_sum; /**< The cummulative distribution. */
-    ILut                  *_output;         /**< Output with the equalization lookup table. */
-private:
-    static const uint32_t _histogram_size = 256; /**< Default histogram size of 256. */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NECUMULATIVEDISTRIBUTIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h
deleted file mode 100644
index 3b2b9a1b79..0000000000
--- a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H
-#define ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the depth concatenate kernel.
- *  The input tensor will be concatenated into the output tensor.
- */
-class NEDepthConcatenateLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEDepthConcatenateLayerKernel";
-    }
-    /** Default constructor */
-    NEDepthConcatenateLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDepthConcatenateLayerKernel(const NEDepthConcatenateLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDepthConcatenateLayerKernel &operator=(const NEDepthConcatenateLayerKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEDepthConcatenateLayerKernel(NEDepthConcatenateLayerKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEDepthConcatenateLayerKernel &operator=(NEDepthConcatenateLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEDepthConcatenateLayerKernel() = default;
-    /** Initialise the kernel's inputs and output
-     *
-     * @param[in]     input        Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in]     depth_offset The offset on the Z axis.
-     * @param[in,out] output       Output tensor info. Data types supported: Same as @p input.
-     *
-     * @note: The output tensor's low two dimensions can't be smaller than the input one's.
-     * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
-     *
-     */
-    void configure(const ITensorInfo *input, unsigned int depth_offset, ITensorInfo *output);
-    /**  Static function to check if given info will lead to a valid configuration of @ref NEDepthConcatenateLayerKernel
-     *
-     * @param[in] input        Input tensor info. Data types supported:  QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in] depth_offset The offset on the Z axis.
-     * @param[in] output       Output tensor info. Data types supported: Same as @p input.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, unsigned int depth_offset, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-
-private:
-    using DepthConcatFunction = void(const ITensor *in, ITensor *out, unsigned int depth_offset, const Window &window);
-
-private:
-    DepthConcatFunction *_func;
-    unsigned int         _depth_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h
deleted file mode 100644
index e297fd7d1b..0000000000
--- a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_DEPTHCONVERTKERNEL_H
-#define ARM_COMPUTE_DEPTHCONVERTKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Depth conversion kernel
- *  This function ignores the scale and zeroPoint of quanized tensors, i.e. QASYMM8 input is treated as uint8 values.
- */
-class NEDepthConvertLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEDepthConvertLayerKernel";
-    }
-    /** Default constructor*/
-    NEDepthConvertLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDepthConvertLayerKernel(const NEDepthConvertLayerKernel &) = delete;
-    /** Default move constructor */
-    NEDepthConvertLayerKernel(NEDepthConvertLayerKernel &&) = default;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDepthConvertLayerKernel &operator=(const NEDepthConvertLayerKernel &) = delete;
-    /** Default move assignment operator */
-    NEDepthConvertLayerKernel &operator=(NEDepthConvertLayerKernel &&) = default;
-    /** Set the input and output of the kernel
-     *
-     * Valid conversions Input -> Output :
-     *
-     *   - QASYMM8_SIGNED -> S16, S32, F32, F16
-     *   - QASYMM8        -> U16, S16, S32, F32, F16
-     *   - U8             -> U16, S16, S32, F32, F16
-     *   - U16            -> U8, U32
-     *   - S16            -> QASYMM8_SIGNED, U8, S32
-     *   - BFLOAT16       -> F32
-     *   - F16            -> QASYMM8_SIGNED, QASYMM8, F32, S32, U8
-     *   - S32            -> QASYMM8_SIGNED, QASYMM8, F16, F32, U8
-     *   - F32            -> QASYMM8_SIGNED, QASYMM8, BFLOAT16, F16, S32, U8
-     *
-     * @param[in]  input  The input tensor to convert. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/BFLOAT16/F16/F32.
-     * @param[out] output The output tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/BFLOAT16/F16/F32.
-     * @param[in]  policy Conversion policy.
-     * @param[in]  shift  (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
-     */
-    void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConvertLayerKernel
-     *
-     * @param[in] input  Source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/BFLOAT16/F16/F32.
-     * @param[in] output Destination tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/BFLOAT16/F16/F32.
-     * @param[in] policy Conversion policy
-     * @param[in] shift  (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input;
-    ITensor       *_output;
-    ConvertPolicy  _policy;
-    uint32_t       _shift;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEDEPTHCONVERTKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h
deleted file mode 100644
index c497b2c858..0000000000
--- a/arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H
-#define ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the depth to space kernel */
-class NEDepthToSpaceLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEDepthToSpaceLayerKernel";
-    }
-    /** Default constructor */
-    NEDepthToSpaceLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDepthToSpaceLayerKernel(const NEDepthToSpaceLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDepthToSpaceLayerKernel &operator=(const NEDepthToSpaceLayerKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEDepthToSpaceLayerKernel(NEDepthToSpaceLayerKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEDepthToSpaceLayerKernel &operator=(NEDepthToSpaceLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEDepthToSpaceLayerKernel() = default;
-    /** Initialise the kernel's inputs and output.
-     *
-     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All
-     * @param[out] output      Tensor output. Data types supported: same as @p input
-     * @param[in]  block_shape Block shape x value.
-     */
-    void configure(const ITensor *input, ITensor *output, int32_t block_shape);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEDepthToSpaceLayerKernel.
-     *
-     * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: All
-     * @param[in] output      Tensor output info. Data types supported: same as @p input
-     * @param[in] block_shape Block shape value.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input;       /**< Source tensor */
-    ITensor       *_output;      /**< Destination tensor */
-    int32_t        _block_shape; /**< Block shape */
-    DataLayout     _data_layout; /**< Data layout of the operation */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDEPTHTOSPACELAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
deleted file mode 100644
index eba1737a03..0000000000
--- a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
-#define ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/utils/misc/Traits.h"
-#include "support/Requires.h"
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#include <arm_neon.h>
-#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the kernel to run a depthwise convolution native on a tensor. */
-class NEDepthwiseConvolutionLayerNativeKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEDepthwiseConvolutionLayerNativeKernel";
-    }
-    /** Default constructor */
-    NEDepthwiseConvolutionLayerNativeKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDepthwiseConvolutionLayerNativeKernel(const NEDepthwiseConvolutionLayerNativeKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDepthwiseConvolutionLayerNativeKernel &operator=(const NEDepthwiseConvolutionLayerNativeKernel &) = delete;
-    /** Default Move Constructor. */
-    NEDepthwiseConvolutionLayerNativeKernel(NEDepthwiseConvolutionLayerNativeKernel &&) = default;
-    /** Default move assignment operator */
-    NEDepthwiseConvolutionLayerNativeKernel &operator=(NEDepthwiseConvolutionLayerNativeKernel &&) = default;
-    /** Initialize the function's source, destination and parameters.
-     *
-     * @note Supported data layouts: NHWC
-     *
-     * @param[in]  input            Source tensor. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in]  weights          Weights tensor. This is a 3D tensor with dimensions [IFM, W, H].
-     *                              Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
-     * @param[in]  biases           Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
-     *                              Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
-     * @param[out] output           Destination tensor. Data type supported: Same as @p input.
-     * @param[in]  conv_info        Padding and stride information to use for the convolution.
-     * @param[in]  depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
-     * @param[in]  dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
-     *
-     */
-    void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
-                   const Size2D &dilation = Size2D(1U, 1U));
-    /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionLayerNativeKernel
-     *
-     * @note Supported data layouts: NHWC
-     *
-     * @param[in] input            Source tensor info. DataType supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in] weights          Weights tensor info. This is a 3D tensor with dimensions [IFM, W, H].
-     *                             Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED.
-     * @param[in] biases           Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
-     *                             Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED.
-     * @param[in] output           Destination tensor info. Data type supported: Same as @p input.
-     * @param[in] conv_info        Padding and stride information to use for the convolution.
-     * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1.
-     * @param[in] dilation         (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1,
-                           const Size2D &dilation = Size2D(1U, 1U));
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    template <typename T>
-    using FloatEnalber = typename std::enable_if<arm_compute::utils::traits::is_floating_point<T>::value, int>::type;
-
-    template <typename T, typename TW, FloatEnalber<T> = 0>
-    void run_depthwise(const Window &window, bool has_biases);
-
-    template <typename T>
-    using Quantized8bitEnalber = typename std::enable_if < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int >::type;
-
-    template <typename T, typename TW, Quantized8bitEnalber<T> = 0>
-    void run_depthwise(const Window &window, bool has_biases);
-
-    /** Common signature for all the specialised depthwise convolution native functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using DepthwiseFunctionPtr = void (NEDepthwiseConvolutionLayerNativeKernel::*)(const Window &window, bool has_biases);
-
-    DepthwiseFunctionPtr _func;
-    const ITensor       *_input;
-    const ITensor       *_weights;
-    const ITensor       *_biases;
-    ITensor             *_output;
-    PadStrideInfo        _conv_info;
-    unsigned int         _depth_multiplier;
-    Size2D               _dilation;
-    std::vector<int>     _output_multiplier;
-    std::vector<int>     _output_shift;
-    bool                 _has_biases;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTIONLAYERNATIVEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h
deleted file mode 100644
index 7b97d06e43..0000000000
--- a/arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the dequantization layer kernel. */
-class NEDequantizationLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEDequantizationLayerKernel";
-    }
-    /** Default constructor */
-    NEDequantizationLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDequantizationLayerKernel(const NEDequantizationLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDequantizationLayerKernel &operator=(const NEDequantizationLayerKernel &) = delete;
-    /** Default Move Constructor. */
-    NEDequantizationLayerKernel(NEDequantizationLayerKernel &&) = default;
-    /** Default move assignment operator */
-    NEDequantizationLayerKernel &operator=(NEDequantizationLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEDequantizationLayerKernel() = default;
-    /** Set input, output tensors.
-     *
-     * @param[in]  input  Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
-     * @param[out] output Destination tensor with the same dimensions of input. Data type supported: F16/F32.
-     */
-    void configure(const ITensor *input, ITensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEDequantizationLayerKernel
-     *
-     * @param[in] input  Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL/QSYMM8/QSYMM16.
-     * @param[in] output Output tensor info. Data types supported: F16/F32.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input;
-    ITensor       *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEDEQUANTIZATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEDerivativeKernel.h b/arm_compute/core/NEON/kernels/NEDerivativeKernel.h
deleted file mode 100644
index 7a46a4194e..0000000000
--- a/arm_compute/core/NEON/kernels/NEDerivativeKernel.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDERIVATIVEKERNEL_H
-#define ARM_COMPUTE_NEDERIVATIVEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to run the derivative along the X/Y directions on a tensor.
- *
- */
-class NEDerivativeKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEDerivativeKernel";
-    }
-    /** Default constructor */
-    NEDerivativeKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDerivativeKernel(const NEDerivativeKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDerivativeKernel &operator=(const NEDerivativeKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEDerivativeKernel(NEDerivativeKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEDerivativeKernel &operator=(NEDerivativeKernel &&) = default;
-    /** Initialise the kernel's sources, destination and border
-     *
-     * @note At least one of output_x or output_y must be set
-     *
-     * @param[in]  input            Source tensor. Data type supported: U8.
-     * @param[out] output_x         (Optional) Destination tensor for the X gradient. Data type supported: S16.
-     * @param[out] output_y         (Optional) Destination tensor for the Y gradient. Data type supported: S16.
-     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
-     */
-    void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    /** Function to perform derivative along the X direction on the given window
-     *
-     * @param[in] window Region on which to execute the kernel
-     */
-    void derivative_x(const Window &window);
-    /** Function to perform derivative along the Y direction on the given window
-     *
-     * @param[in] window Region on which to execute the kernel
-     */
-    void derivative_y(const Window &window);
-    /** Function to perform derivative along the X and Y direction on the given window
-     *
-     * @param[in] window Region on which to execute the kernel
-     */
-    void derivative_xy(const Window &window);
-    /** Common signature for all the specialised derivative functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using DerivativeFunction = void (NEDerivativeKernel::*)(const Window &window);
-    /** Derivative function to use for the particular tensor types passed to configure() */
-    DerivativeFunction _func;
-
-private:
-    const ITensor *_input;    /**< Input tensor */
-    ITensor       *_output_x; /**< Output tensor - Derivate along the X direction */
-    ITensor       *_output_y; /**< Output tensor - Derivate along the Y direction */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEDERIVATIVEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEDilateKernel.h b/arm_compute/core/NEON/kernels/NEDilateKernel.h
deleted file mode 100644
index 424cf549a1..0000000000
--- a/arm_compute/core/NEON/kernels/NEDilateKernel.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDILATEKERNEL_H
-#define ARM_COMPUTE_NEDILATEKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform boolean image dilatation */
-class NEDilateKernel : public INESimpleKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEDilateKernel";
-    }
-    /** Set the source, destination and border mode of the kernel
-     *
-     * @param[in]  input            Source tensor. Data type supported: U8
-     * @param[out] output           Destination tensor. Data type supported: U8
-     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
-     */
-    void configure(const ITensor *input, ITensor *output, bool border_undefined);
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEDILATEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
deleted file mode 100644
index c927aff1eb..0000000000
--- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H
-#define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON interface for Direct Convolution Layer kernel */
-class NEDirectConvolutionLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEDirectConvolutionLayerKernel";
-    }
-    /** Default constructor */
-    NEDirectConvolutionLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDirectConvolutionLayerKernel(const NEDirectConvolutionLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDirectConvolutionLayerKernel &operator=(const NEDirectConvolutionLayerKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEDirectConvolutionLayerKernel(NEDirectConvolutionLayerKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEDirectConvolutionLayerKernel &operator=(NEDirectConvolutionLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEDirectConvolutionLayerKernel() = default;
-    /** Set the input, weights, and output tensors.
-     *
-     * @note: DirectConvolution only works in the following configurations:
-     *        1x1 convolution with stride_x = 1/2/3, stride_y = 1/2/3
-     *        3x3 convolution with stride_x = 1/2/3, stride_y = 1/2/3
-     *
-     * @param[in]  input     The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
-     *                       while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32.
-     * @param[in]  weights   Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
-     *                       The 3rd dimension must be the same as the input's volume 3rd dimension.
-     *                       Data type supported:Same as @p input.
-     * @param[out] output    Output tensor.
-     *                       The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: F16/F32
-     * @param[in]  conv_info Contains padding and stride information described in @ref PadStrideInfo.
-     */
-    void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerKernel
-     *
-     * @param[in] input     The input tensor to convolve. 3 lower dimensions represent a single input [width, height, IFM],
-     *                      while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32.
-     * @param[in] weights   Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
-     *                      The 3rd dimension must be the same as the input's volume 3rd dimension.
-     *                      Data type supported:Same as @p input.
-     * @param[in] output    Output tensor.
-     *                      The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: F16/F32
-     * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, const PadStrideInfo &conv_info);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    /* Template function for optimized convolution NHWC */
-    template <typename T>
-    void convolve_nhwc_optimized(const Window &window);
-
-    /* Template function for convolution NHWC */
-    template <typename T>
-    void convolve_nhwc(const Window &window);
-
-    const ITensor *_input;
-    const ITensor *_weights;
-    ITensor       *_output;
-    PadStrideInfo  _conv_info;
-    BorderSize     _border_size;
-    unsigned int   _kernel_size;
-    unsigned int   _num_weight_elems_read_per_row;
-    unsigned int   _num_elems_read_per_iteration;
-    unsigned int   _num_elems_written_per_iteration;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h b/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h
deleted file mode 100644
index 552a88ce42..0000000000
--- a/arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H
-#define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H
-
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-/** NEON kernel to accumulate the biases, if provided, or downscale in case of quantized input.
- *
- * @note We assume bias to be shared
- * @note For quantized computations (i.e. @p input of S32 type) the output data type for auto-initialization must be passed as part
- *       of the @ref DirectConvolutionLayerOutputStageKernelInfo.
- */
-class NEDirectConvolutionLayerOutputStageKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEDirectConvolutionLayerOutputStageKernel";
-    }
-    /** Default constructor */
-    NEDirectConvolutionLayerOutputStageKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDirectConvolutionLayerOutputStageKernel(const NEDirectConvolutionLayerOutputStageKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDirectConvolutionLayerOutputStageKernel &operator=(const NEDirectConvolutionLayerOutputStageKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEDirectConvolutionLayerOutputStageKernel(NEDirectConvolutionLayerOutputStageKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEDirectConvolutionLayerOutputStageKernel &operator=(NEDirectConvolutionLayerOutputStageKernel &&) = default;
-    /** Default destructor */
-    ~NEDirectConvolutionLayerOutputStageKernel() = default;
-    /** Set the accumulate buffer and the biases of the kernel.
-     *
-     * @param[in, out] input  Input to add the bias to. If @p output is not specified then accumulation is done in-place.
-     *                        Data type supported: F16/F32/S32
-     * @param[in]      bias   (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
-     * @param[out]     output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
-     *                        Note that in-place computation is only supported for F16/F32. For S32 this must not be nullptr.
-     *                        Data type supported: F16/F32 or QASYMM8/QASYMM8_SIGNED if @p input is S32
-     * @param[in]      info   (Optional) DirectConvolutionLayerOutputStageKernel descriptor metadata
-     */
-    void configure(ITensor *input, const ITensor *bias = nullptr, ITensor *output = nullptr,
-                   const DirectConvolutionLayerOutputStageKernelInfo &info = DirectConvolutionLayerOutputStageKernelInfo());
-    /** Static function to check if given info will lead to a valid configuration of @ref NEDirectConvolutionLayerOutputStageKernel
-     *
-     * @param[in] input  Input to add the bias to. If @p output is not specified then accumulation is done in-place.
-     *                   Data type supported: F16/F32/S32
-     * @param[in] bias   (Optional) The shared bias tensor to add. It must be 1D Tensor. Data type supported: Same as @p input
-     * @param[in] output (Optional) If the output tensor is specified the accumulation is done out-of-place. (Defaults to nullptr)
-     *                   Note that in-place computation is only supported for F16/F32. For S32 this must not be nullptr.
-     *                   Data type supported: F16/F32 or QASYMM8/QASYMM8_SIGNED if @p input is S32
-     * @param[in] info   (Optional) DirectConvolutionLayerOutputStageKernel descriptor metadata
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *bias = nullptr, const ITensorInfo *output = nullptr,
-                           const DirectConvolutionLayerOutputStageKernelInfo &info = DirectConvolutionLayerOutputStageKernelInfo());
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    using OutputStageKernel = void(ITensor *input, const ITensor *bias, const Window &window, ITensor *output,
-                                   int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, bool has_bias);
-
-private:
-    OutputStageKernel *_func;
-    ITensor           *_input;
-    const ITensor     *_bias;
-    ITensor           *_output;
-    int                _result_fixedpoint_multiplier;
-    int                _result_shift;
-    int                _result_offset_after_shift;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYEROUTPUTSTAGEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h
deleted file mode 100644
index 7dae25c22c..0000000000
--- a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h
+++ /dev/null
@@ -1,214 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H
-#define ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for an element-wise operation kernel
- *
- * Element-wise operation is computed by:
- * @f[ output(x,y) = OP(input1(x,y), input2(x,y))@f]
- *
- */
-class NEElementwiseOperationKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEElementwiseOperationKernel";
-    }
-    /** Default constructor */
-    NEElementwiseOperationKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEElementwiseOperationKernel(const NEElementwiseOperationKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEElementwiseOperationKernel &operator=(const NEElementwiseOperationKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEElementwiseOperationKernel(NEElementwiseOperationKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEElementwiseOperationKernel &operator=(NEElementwiseOperationKernel &&) = default;
-    /** Default destructor */
-    ~NEElementwiseOperationKernel() = default;
-
-    /** Common signature for all the specialised arithmetic functions
-     *
-     * @param[in]  input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
-     * @param[in]  input2 Second tensor input info. Data types supported: Same as @p input1.
-     * @param[out] output Output tensor info. Data types supported: Dependent on subclass.
-     * @param[in]  window Region on which to execute the kernel.
-     */
-    using ElementwiseFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window);
-
-    // Inherited methods overridden:
-    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-
-protected:
-    /** Validate the argument passed to the kernel
-     *
-     * @param[in] input1 First tensor input. Data types supported: QASYMM8/S16/F16/S32/F32.
-     * @param[in] input2 Second tensor input. Data types supported: Same as @p input1.
-     * @param[in] output Output tensor. Data types supported: Dependent on subclass.
-     */
-    static Status validate_arguments_common(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
-
-    /** Commmon configure function for element-wise operators with no additional options (e.g. Min, Max, SquaredDiff)
-     *
-     */
-    void configure_common(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
-
-    /** Function to use for the particular tensor types passed to configure() */
-    std::function<void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window)> _function;
-
-    const ITensor *_input1;
-    const ITensor *_input2;
-    ITensor       *_output;
-};
-
-class NEArithmeticOperationKernel : public NEElementwiseOperationKernel
-{
-public:
-    /** Default constructor */
-    NEArithmeticOperationKernel() = default;
-
-    /** Configure kernel
-     *
-     * @param[in]  op     Arithmetic operation to be executed.
-     * @param[in]  input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
-     * @param[in]  input2 Second tensor input info. Data types supported: Same as @p input1.
-     * @param[out] output Output tensor info. Data types supported: Same as @p input1.
-     */
-    void configure(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
-     *
-     * @param[in] op     Arithmetic operation to be executed.
-     * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
-     * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
-     * @param[in] output Output tensor info. Data types supported: Same as @p input1.
-     *
-     * @return a Status
-     */
-    static Status validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-
-protected:
-    // Inherited methods overridden:
-    static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
-};
-
-class NEDivisionOperationKernel : public NEArithmeticOperationKernel
-{
-public:
-    /** Default constructor */
-    NEDivisionOperationKernel() = default;
-
-    /** Configure kernel
-     *
-     * @param[in]  input1 First tensor input info. Data types supported: S32/F16/F32.
-     * @param[in]  input2 Second tensor input info. Data types supported: Same as @p input1.
-     * @param[out] output Output tensor info. Data types supported: Same as @p input1.
-     */
-    void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref NEDivisionOperationKernel
-     *
-     * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32.
-     * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
-     * @param[in] output Output tensor info. Data types supported: Same as @p input1.
-     *
-     * @return a Status
-     */
-    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-
-protected:
-    // Inherited methods overridden:
-    static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
-};
-
-class NEPowerOperationKernel : public NEArithmeticOperationKernel
-{
-public:
-    /** Default constructor */
-    NEPowerOperationKernel() = default;
-
-    /** Configure kernel
-     *
-     * @param[in]  input1 First tensor input info. Data types supported: F16/F32.
-     * @param[in]  input2 Second tensor input info. Data types supported: Same as @p input1.
-     * @param[out] output Output tensor info. Data types supported: Same as @p input1.
-     */
-    void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref NEPowerOperationKernel
-     *
-     * @param[in] input1 First tensor input info. Data types supported: F16/F32.
-     * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
-     * @param[in] output Output tensor info. Data types supported: Same as @p input1.
-     *
-     * @return a Status
-     */
-    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-
-protected:
-    // Inherited methods overridden:
-    static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
-};
-
-class NEComparisonOperationKernel : public NEElementwiseOperationKernel
-{
-public:
-    /** Default constructor */
-    NEComparisonOperationKernel() = default;
-
-    /** Configure kernel
-     *
-     * @param[in]  op     Comparison operation to be executed.
-     * @param[in]  input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
-     * @param[in]  input2 Second tensor input info. Data types supported: Same as @p input1.
-     * @param[out] output Output tensor info. Data types supported: U8.
-     */
-    void configure(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel
-     *
-     * @param[in] op     Comparison operation to be executed.
-     * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
-     * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
-     * @param[in] output Output tensor info. Data types supported: U8.
-     *
-     * @return a Status
-     */
-    static Status validate(ComparisonOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-
-protected:
-    // Inherited methods overridden:
-    static Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output);
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEELEMENTWISEOPERATIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h b/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h
deleted file mode 100644
index 7f9d7ad114..0000000000
--- a/arm_compute/core/NEON/kernels/NEElementwiseUnaryKernel.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H
-#define ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for an element-wise unary operation kernel
- *
- * Element-wise operation is computed by:
- * @f[ output(x) = OP(input(x))@f]
- *
- */
-class NEElementwiseUnaryKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEElementwiseUnaryKernel";
-    }
-    /** Default constructor */
-    NEElementwiseUnaryKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEElementwiseUnaryKernel(const NEElementwiseUnaryKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEElementwiseUnaryKernel &operator=(const NEElementwiseUnaryKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEElementwiseUnaryKernel(NEElementwiseUnaryKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEElementwiseUnaryKernel &operator=(NEElementwiseUnaryKernel &&) = default;
-    /** Default destructor */
-    ~NEElementwiseUnaryKernel() = default;
-
-    /** Function to configure the @ref NEElementwiseUnaryKernel
-     *
-     * @param[in]  op     Arithmetic operation to be executed.
-     * @param[in]  input  First tensor input. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations.
-     * @param[out] output Output tensor. Data types supported: Same as @p input.
-     */
-    void configure(ElementWiseUnary op, const ITensor *input, ITensor *output);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref NEElementwiseUnaryKernel
-     *
-     * @param[in] op     Arithmetic operation to be executed.
-     * @param[in] input  First tensor input info. Data types supported: F16/F32, F16/F32/S32 for NEG/ABS operations.
-     * @param[in] output Output tensor info. Data types supported: Same as @p input.
-     *
-     * @return a Status
-     */
-    static Status validate(ElementWiseUnary op, const ITensorInfo *input, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Common signature for all the specialised arithmetic functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using ElementwiseUnaryPtr = void (NEElementwiseUnaryKernel::*)(const Window &window);
-
-    /** Template function to run elementwise unary operation
-     *
-     * @tparam ScalarType Scalar datatype
-     *
-     * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
-     */
-    template <typename ScalarType>
-    void elementwise_op(const Window &window);
-
-    ElementwiseUnaryPtr _func;
-    const ITensor      *_input;
-    ITensor            *_output;
-    ElementWiseUnary    _op;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEELEMENTWISEUNARYKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEErodeKernel.h b/arm_compute/core/NEON/kernels/NEErodeKernel.h
deleted file mode 100644
index 140481df17..0000000000
--- a/arm_compute/core/NEON/kernels/NEErodeKernel.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEERODEKERNEL_H
-#define ARM_COMPUTE_NEERODEKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform boolean image erosion */
-class NEErodeKernel : public INESimpleKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEErodeKernel";
-    }
-    /** Set the source, destination and border mode of the kernel
-     *
-     * @param[in]  input            Source tensor. Data type supported: U8
-     * @param[out] output           Destination tensor. Data type supported: U8
-     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
-     */
-    void configure(const ITensor *input, ITensor *output, bool border_undefined);
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEERODEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h b/arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h
deleted file mode 100644
index f7dc0b1d16..0000000000
--- a/arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H
-#define ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H
-
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the digit reverse operation kernel. */
-class NEFFTDigitReverseKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEFFTDigitReverseKernel";
-    }
-    /** Constructor */
-    NEFFTDigitReverseKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEFFTDigitReverseKernel(const NEFFTDigitReverseKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEFFTDigitReverseKernel &operator=(const NEFFTDigitReverseKernel &) = delete;
-    /** Default Move Constructor. */
-    NEFFTDigitReverseKernel(NEFFTDigitReverseKernel &&) = default;
-    /** Default move assignment operator */
-    NEFFTDigitReverseKernel &operator=(NEFFTDigitReverseKernel &&) = default;
-    /** Default destructor */
-    ~NEFFTDigitReverseKernel() = default;
-    /** Set the input and output tensors.
-     *
-     * @param[in]  input  Source tensor. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor).
-     * @param[out] output Destination tensor. Data type supported: same as @p input. Number of channels supported: 2 (complex tensor).
-     * @param[in]  idx    Digit reverse index tensor. Data type supported: U32
-     * @param[in]  config Kernel configuration.
-     */
-    void configure(const ITensor *input, ITensor *output, const ITensor *idx, const FFTDigitReverseKernelInfo &config);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref NEFFTDigitReverseKernel
-     *
-     * @param[in] input  Source tensor info. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor).
-     * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: 2 (complex tensor).
-     * @param[in] idx    Digit reverse index tensor info. Data type supported: U32
-     * @param[in] config Kernel configuration
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *idx, const FFTDigitReverseKernelInfo &config);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    using NEFFTDigitReverseKernelFunctionPtr = void (NEFFTDigitReverseKernel::*)(const Window &window);
-
-    template <bool is_input_complex, bool is_conj>
-    void digit_reverse_kernel_axis_0(const Window &window);
-
-    template <bool is_input_complex, bool is_conj>
-    void digit_reverse_kernel_axis_1(const Window &window);
-
-    NEFFTDigitReverseKernelFunctionPtr _func;
-    const ITensor                     *_input;
-    ITensor                           *_output;
-    const ITensor                     *_idx;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEFFTDIGITREVERSEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h b/arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h
deleted file mode 100644
index 15663e7490..0000000000
--- a/arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H
-#define ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H
-
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include <arm_neon.h>
-#include <set>
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the FFT kernel. */
-class NEFFTRadixStageKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEFFTRadixStageKernel";
-    }
-    /** Constructor */
-    NEFFTRadixStageKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEFFTRadixStageKernel(const NEFFTRadixStageKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEFFTRadixStageKernel &operator=(const NEFFTRadixStageKernel &) = delete;
-    /** Default Move Constructor. */
-    NEFFTRadixStageKernel(NEFFTRadixStageKernel &&) = default;
-    /** Default move assignment operator */
-    NEFFTRadixStageKernel &operator=(NEFFTRadixStageKernel &&) = default;
-    /** Default destructor */
-    ~NEFFTRadixStageKernel() = default;
-    /** Set the input and output tensors.
-     *
-     * @note If the output tensor is nullptr, the FFT will be performed in-place
-     *
-     * @param[in,out] input  Source tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor).
-     * @param[out]    output Destination tensor. Data type supported: same as @p input. Number of channels supported: same as @p input.
-     * @param[in]     config FFT descriptor metadata.
-     */
-    void configure(ITensor *input, ITensor *output, const FFTRadixStageKernelInfo &config);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEFFTRadixStageKernel
-     *
-     * @param[in] input  Source tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor).
-     * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: same as @p input.
-     * @param[in] config FFT descriptor metadata.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTRadixStageKernelInfo &config);
-    /** Returns the radix that are support by the FFT kernel
-     *
-     * @return A set of supported radix
-     */
-    static std::set<unsigned int> supported_radix();
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    ITensor     *_input;
-    ITensor     *_output;
-    bool         _run_in_place;
-    unsigned int _Nx;
-    unsigned int _axis;
-    unsigned int _radix;
-
-    void set_radix_stage_axis0(const FFTRadixStageKernelInfo &config);
-    void set_radix_stage_axis1(const FFTRadixStageKernelInfo &config);
-
-    using FFTFunctionPointerAxis0 = std::function<void(float *, float *, unsigned int, unsigned int, const float32x2_t &, unsigned int)>;
-    using FFTFunctionPointerAxis1 = std::function<void(float *, float *, unsigned int, unsigned int, const float32x2_t &, unsigned int, unsigned int)>;
-
-    FFTFunctionPointerAxis0 _func_0;
-    FFTFunctionPointerAxis1 _func_1;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEFFTRADIXSTAGEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEFFTScaleKernel.h b/arm_compute/core/NEON/kernels/NEFFTScaleKernel.h
deleted file mode 100644
index c25ba323ab..0000000000
--- a/arm_compute/core/NEON/kernels/NEFFTScaleKernel.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFFTSCALEKERNEL_H
-#define ARM_COMPUTE_NEFFTSCALEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include "arm_compute/core/KernelDescriptors.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the inverse fft scale kernel. */
-class NEFFTScaleKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEFFTScaleKernel";
-    }
-    /** Constructor */
-    NEFFTScaleKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEFFTScaleKernel(const NEFFTScaleKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEFFTScaleKernel &operator=(const NEFFTScaleKernel &) = delete;
-    /** Default Move Constructor. */
-    NEFFTScaleKernel(NEFFTScaleKernel &&) = default;
-    /** Default move assignment operator */
-    NEFFTScaleKernel &operator=(NEFFTScaleKernel &&) = default;
-    /** Default destructor */
-    ~NEFFTScaleKernel() = default;
-    /** Set the input and output tensors.
-     *
-     * @param[in,out] input  Source tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor).
-     * @param[out]    output Destination tensor. Data type supported: same as @p input. Number of channels supported: 1 (real tensor) or 2 (complex tensor).
-     * @param[in]     config Kernel configuration
-     */
-    void configure(ITensor *input, ITensor *output, const FFTScaleKernelInfo &config);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEFFTScaleKernel
-     *
-     * @param[in] input  Source tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor).
-     * @param[in] output Destination tensor info. Data type supported: same as @p input. Number of channels supported: 1 (real tensor) or 2 (complex tensor).
-     * @param[in] config Kernel configuration
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const FFTScaleKernelInfo &config);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    ITensor *_input;
-    ITensor *_output;
-    float    _scale;
-    bool     _run_in_place;
-    bool     _is_conj;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEFFTSCALEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEFastCornersKernel.h b/arm_compute/core/NEON/kernels/NEFastCornersKernel.h
deleted file mode 100644
index e4e87c032f..0000000000
--- a/arm_compute/core/NEON/kernels/NEFastCornersKernel.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFASTCORNERSKERNEL_H
-#define ARM_COMPUTE_NEFASTCORNERSKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-using IImage = ITensor;
-
-/** NEON kernel to perform fast corners */
-class NEFastCornersKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEFastCornersKernel";
-    }
-    /** Constructor */
-    NEFastCornersKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEFastCornersKernel(const NEFastCornersKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEFastCornersKernel &operator=(const NEFastCornersKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEFastCornersKernel(NEFastCornersKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEFastCornersKernel &operator=(NEFastCornersKernel &&) = default;
-    /** Initialise the kernel.
-     *
-     * @param[in]  input               Source image. Data type supported: U8.
-     * @param[out] output              Output image. Data type supported: U8.
-     * @param[in]  threshold           Threshold on difference between intensity of the central pixel and pixels on Bresenham's circle of radius 3.
-     * @param[in]  non_max_suppression True if non-maxima suppresion is applied, false otherwise.
-     * @param[in]  border_undefined    True if the border mode is undefined. False if it's replicate or constant.
-     */
-    void configure(const IImage *input, IImage *output, uint8_t threshold, bool non_max_suppression, bool border_undefined);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    const IImage *_input;               /**< source image */
-    IImage       *_output;              /**< inermediate results */
-    uint8_t       _threshold;           /**< threshold on difference between intensity */
-    bool          _non_max_suppression; /** true if non-maxima suppression is applied in the next stage */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEFASTCORNERSKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEFillArrayKernel.h b/arm_compute/core/NEON/kernels/NEFillArrayKernel.h
deleted file mode 100644
index 99df8795ae..0000000000
--- a/arm_compute/core/NEON/kernels/NEFillArrayKernel.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFILLARRAYKERNEL_H
-#define ARM_COMPUTE_NEFILLARRAYKERNEL_H
-
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-using IImage = ITensor;
-
-/** This kernel adds all texels greater than or equal to the threshold value to the keypoint array. */
-class NEFillArrayKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEFillArrayKernel";
-    }
-    /** Default contructor */
-    NEFillArrayKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEFillArrayKernel(const NEFillArrayKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEFillArrayKernel &operator=(const NEFillArrayKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEFillArrayKernel(NEFillArrayKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEFillArrayKernel &operator=(NEFillArrayKernel &&) = default;
-    /** Default detructor */
-    ~NEFillArrayKernel() = default;
-
-    /** Initialise the kernel.
-     *
-     * @param[in]  input     Source image. Data type supported: U8.
-     * @param[in]  threshold Texels greater than the threshold will be added to the array.
-     * @param[out] output    Arrays of keypoints to store the results.
-     */
-    void configure(const IImage *input, uint8_t threshold, IKeyPointArray *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    bool is_parallelisable() const override;
-
-private:
-    const IImage   *_input;
-    IKeyPointArray *_output;
-    uint8_t         _threshold;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEFILLARRAYKERNEL_H*/
diff --git a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h
deleted file mode 100644
index 071843d114..0000000000
--- a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFILLBORDERKERNEL_H
-#define ARM_COMPUTE_NEFILLBORDERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the kernel to fill borders */
-class NEFillBorderKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEFillBorderKernel";
-    }
-    /** Default Constructor */
-    NEFillBorderKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEFillBorderKernel(const NEFillBorderKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEFillBorderKernel &operator=(const NEFillBorderKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEFillBorderKernel(NEFillBorderKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEFillBorderKernel &operator=(NEFillBorderKernel &&) = default;
-    /** Default destructor */
-    ~NEFillBorderKernel() = default;
-
-    /** Initialise the function.
-     *
-     * @note This kernel fills the borders within the XY-planes.
-     *
-     * @param[in,out] tensor                Tensor to process. Data types supported: All.
-     * @param[in]     border_size           Size of the border to fill in elements.
-     * @param[in]     border_mode           Border mode to use for the convolution.
-     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
-     *
-     */
-    void configure(ITensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    void fill_replicate_single_channel(const Window &window);
-    void fill_constant_value_single_channel(const Window &window);
-
-    ITensor   *_tensor;
-    BorderSize _border_size;
-    BorderMode _mode;
-    PixelValue _constant_border_value;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEFILLBORDERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h b/arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h
deleted file mode 100644
index dbd24129f1..0000000000
--- a/arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFLATTENLAYERKERNEL_H
-#define ARM_COMPUTE_NEFLATTENLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the flatten layer kernel. */
-class NEFlattenLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEFlattenLayerKernel";
-    }
-    /** Default constructor */
-    NEFlattenLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEFlattenLayerKernel(const NEFlattenLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEFlattenLayerKernel &operator=(const NEFlattenLayerKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEFlattenLayerKernel(NEFlattenLayerKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEFlattenLayerKernel &operator=(NEFlattenLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEFlattenLayerKernel() = default;
-
-    /** Set the input and output of the kernel.
-     *
-     * @param[in]  input  First input tensor to flatten with at least 3 dimensions.
-     *                    The dimensions above the third will be interpreted as batches. Data types supported: All
-     * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
-     *                    w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
-     */
-    void configure(const ITensor *input, ITensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEFlattenLayerKernel
-     *
-     * @param[in]  input  First input tensor to flatten with at least 3 dimensions.
-     *                    The dimensions above the third will be interpreted as batches. Data types supported: All
-     * @param[out] output Output tensor with shape [w*h*d, input_batches] where:
-     *                    w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input;
-    ITensor       *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEFLATTENLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEFloorKernel.h b/arm_compute/core/NEON/kernels/NEFloorKernel.h
deleted file mode 100644
index 255b0d4fb9..0000000000
--- a/arm_compute/core/NEON/kernels/NEFloorKernel.h
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFLOORKERNEL_H
-#define ARM_COMPUTE_NEFLOORKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a floor operation */
-class NEFloorKernel : public INESimpleKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEFloorKernel";
-    }
-    /** Set the source, destination of the kernel
-     *
-     * @param[in]  input  Source tensor. Data type supported: F16/F32.
-     * @param[out] output Destination tensor. Same as @p input
-     */
-    void configure(const ITensor *input, ITensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEFloorKernel
-     *
-     * @param[in] input  Source tensor info. Data type supported: F16/F32.
-     * @param[in] output Destination tensor info. Same as @p input
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEFLOORKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h b/arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h
deleted file mode 100644
index ecb17f87a2..0000000000
--- a/arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H
-#define ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** OpenNE kernel to fuse the batch normalization node to a preceding convolution node */
-class NEFuseBatchNormalizationKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEFuseBatchNormalizationKernel";
-    }
-    /** Default constructor */
-    NEFuseBatchNormalizationKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEFuseBatchNormalizationKernel(const NEFuseBatchNormalizationKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEFuseBatchNormalizationKernel &operator=(const NEFuseBatchNormalizationKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEFuseBatchNormalizationKernel(NEFuseBatchNormalizationKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEFuseBatchNormalizationKernel &operator=(NEFuseBatchNormalizationKernel &&) = default;
-    /** Default destructor */
-    ~NEFuseBatchNormalizationKernel() = default;
-    /** Set the source, destination of the kernel
-     *
-     * @param[in]  input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
-     * @param[in]  bn_mean       Batch normalization layer mean tensor. Same as @p input_weights
-     * @param[in]  bn_var        Batch normalization layer variance tensor. Same as @p input_weights
-     * @param[out] fused_weights (Optional) Output fused weights tensor. It can be a nullptr in case of in-place computation. Same as @p input_weights
-     * @param[out] fused_bias    (Optional) Output fused bias tensor. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights
-     * @param[in]  input_bias    (Optional) Input bias tensor for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights
-     * @param[in]  bn_beta       (Optional) Batch normalization layer beta tensor. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights
-     *                           @note if nullptr, bn_beta is set to 0.0
-     * @param[in]  bn_gamma      (Optional) Batch normalization layer gamma tensor. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights
-     *                           @note if nullptr, bn_gamma is set to 1.0
-     * @param[in]  epsilon       (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
-     * @param[in]  fbn_type      (Optional) Fused batch normalization type. Defaults to CONVOLUTION.
-     */
-    void configure(const ITensor *input_weights, const ITensor *bn_mean, const ITensor *bn_var, ITensor *fused_weights, ITensor *fused_bias,
-                   const ITensor *input_bias = nullptr, const ITensor *bn_beta = nullptr, const ITensor *bn_gamma = nullptr,
-                   float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEFuseBatchNormalizationKernel
-     *
-     * @param[in] input_weights Input weights tensor info for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
-     * @param[in] bn_mean       Batch normalization layer mean tensor info. Same as @p input_weights
-     * @param[in] bn_var        Batch normalization layer variance tensor info. Same as @p input_weights
-     * @param[in] fused_weights (Optional) Output fused weights tensor info. It can be a nullptr in case of in-place computation. Same as @p input_weights
-     * @param[in] fused_bias    (Optional) Output fused bias tensor info. It can be a nullptr in case of in-place computation and input_bias != nullptr. Same as @p input_weights
-     * @param[in] input_bias    (Optional) Input bias tensor info for convolution or depthwise convolution layer. It can be a nullptr in case the bias tensor is not required. Same as @p input_weights
-     * @param[in] bn_beta       (Optional) Batch normalization layer beta tensor info. It can be a nullptr in case the beta tensor is not required. Same as @p input_weights
-     *                          @note if nullptr, bn_beta is set to 0.0
-     * @param[in] bn_gamma      (Optional) Batch normalization layer gamma tensor info. It can be a nullptr in case the gamma tensor is not required. Same as @p input_weights
-     *                          @note if nullptr, bn_gamma is set to 1.0
-     * @param[in] epsilon       (Optional) Batch normalization layer epsilon parameter. Defaults to 0.001f.
-     * @param[in] fbn_type      (Optional) Fused batch normalization type. Defaults to CONVOLUTION.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input_weights, const ITensorInfo *bn_mean, const ITensorInfo *bn_var,
-                           const ITensorInfo *fused_weights, const ITensorInfo *fused_bias,
-                           const ITensorInfo *input_bias = nullptr, const ITensorInfo *bn_beta = nullptr, const ITensorInfo *bn_gamma = nullptr,
-                           float epsilon = 0.001f, FuseBatchNormalizationType fbn_type = FuseBatchNormalizationType::CONVOLUTION);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input_weights;
-    const ITensor *_input_bias;
-    const ITensor *_bn_mean;
-    const ITensor *_bn_var;
-    const ITensor *_bn_gamma;
-    const ITensor *_bn_beta;
-    ITensor       *_fused_weights;
-    ITensor       *_fused_bias;
-    float          _epsilon;
-    bool           _run_in_place_weights;
-    bool           _run_in_place_bias;
-
-    using FuseBatchNormFunction = void(const ITensor *input_weights, const ITensor *input_bias, ITensor *fused_weights, ITensor *fused_bias,
-                                       const ITensor *bn_mean, const ITensor *bn_var, const ITensor *bn_beta, const ITensor *bn_gamma, float epsilon, const Window &window);
-
-    FuseBatchNormFunction *_func;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEFUSEBATCHNORMALIZATIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h b/arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h
deleted file mode 100644
index a2f0e8c5a8..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMASSEMBLYBASE_H
-#define ARM_COMPUTE_NEGEMMASSEMBLYBASE_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Base class for GEMM NEON kernels implemented in Assembly. */
-class NEGEMMAssemblyBaseKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEGEMMAssemblyBaseKernel";
-    }
-    /** Constructor */
-    NEGEMMAssemblyBaseKernel()
-        : _input0(nullptr), _input1(nullptr), _output(nullptr), _workspace(nullptr), _alpha(1.f), _beta(0.f), _is_transposed_0(false), _is_transposed_1(false)
-    {
-    }
-
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEGEMMAssemblyBaseKernel(const NEGEMMAssemblyBaseKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEGEMMAssemblyBaseKernel &operator=(const NEGEMMAssemblyBaseKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEGEMMAssemblyBaseKernel(NEGEMMAssemblyBaseKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEGEMMAssemblyBaseKernel &operator=(NEGEMMAssemblyBaseKernel &&) = default;
-
-    virtual ~NEGEMMAssemblyBaseKernel() = default;
-
-    /** Initialise the kernel's input and output.
-     *
-     * The computed function is C = a * AxB + b * C.
-     *
-     * @param[in]     input0          Input tensor containing the Matrix A. Data types supported: F32
-     * @param[in]     input1          Input tensor containing the Matrix B. Data types supported: same as @p input0
-     * @param[in,out] output          Output tensor to store the result of matrix multiplication. If @p beta is not zero the values are multiplied by @p beta before the result is accumulated. Otherwise the values are overwritten by the result. Data types supported: same as @p input0.
-     * @param[out]    workspace       Space for intermediate results.
-     * @param[in]     alpha           Weight of the matrix product
-     * @param[in]     beta            Weight of the accumulation.
-     * @param[in]     is_transposed_0 (Optional)True if @p input0 is transposed else false. (Defaults to false)
-     * @param[in]     is_transposed_1 (Optional)True if @p input1 is transposed else false. (Defaults to false)
-     */
-    void configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha = 1.f, float beta = 0.f, bool is_transposed_0 = false, bool is_transposed_1 = false)
-    {
-        internal_configure(input0, input1, output, workspace, alpha, beta, is_transposed_0, is_transposed_1);
-    }
-
-protected:
-    virtual void internal_configure(const ITensor *input0, const ITensor *input1, ITensor *output, ITensor *workspace, float alpha, float beta, bool _is_transposed_0, bool _is_transposed_1) = 0;
-
-    const ITensor *_input0;
-    const ITensor *_input1;
-    ITensor       *_output;
-    ITensor       *_workspace;
-    float          _alpha;
-    float          _beta;
-    bool           _is_transposed_0;
-    bool           _is_transposed_1;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMASSEMBLYBASE_H*/
diff --git a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h b/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
deleted file mode 100644
index 322932bab2..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H
-#define ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to interleave the elements of a matrix
- *
- * This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values)
- *
- * @f[
- * \left( \begin{array}{cccc}
- * a00 & a01 & a02 & a03 \\
- * a10 & a11 & a12 & a13 \\
- * a20 & a21 & a22 & a23 \\
- * a30 & a31 & a32 & a33 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{ccccccccccccccccc}
- * a00 & a10 & a20 & a30 & a01 & a11 & a21 & a31 & a02 & a12 & a22 & a32 & a03 & a13 & a23 & a33 \\
- * \end{array} \right)
- * @f]
- *
- * After this operation, the output matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ]
- */
-class NEGEMMInterleave4x4Kernel : public INESimpleKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEGEMMInterleave4x4Kernel";
-    }
-    /* Constructor */
-    NEGEMMInterleave4x4Kernel();
-    /** Initialise the kernel's input and output.
-     *
-     * @param[in]  input  Input tensor. Data types supported: All
-     * @param[out] output Output tensor which stores the interleaved matrix. Data type supported: same as @p input.
-     */
-    void configure(const ITensor *input, ITensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMInterleave4x4Kernel
-     *
-     * @param[in] input  Input tensor info. Data types supported: All
-     * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Template function to run gemm interleave 4x4
-     *
-     * @tparam ScalarType Scalar datatype
-     *
-     * @param[in]  input  Input tensor. Data types supported: uint32_t, uint16_t and uint8_t
-     * @param[out] output Output tensor. Data types supported: uint32_t, uint16_t and uint8_t
-     * @param[in]  window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
-     */
-    template <typename ScalarType>
-    void gemm_interleave4x4(const ITensor *input, ITensor *output, const Window &window);
-
-    /** Common signature for all the specialised gemm interleave 4x4 functions
-     *
-     * @param[in]  input  Input tensor. Data types supported: uint32_t, uint16_t and uint8_t
-     * @param[out] output Output tensor. Data types supported: uint32_t, uint16_t and uint8_t
-     * @param[in]  window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
-     */
-    using GEMMInterleaveFunctionFuncPtr = void (NEGEMMInterleave4x4Kernel::*)(const ITensor *input, ITensor *output, const Window &window);
-
-    GEMMInterleaveFunctionFuncPtr _func;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMINTERLEAVE4x4KERNEL_H*/
diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h
deleted file mode 100644
index 856cdf42e7..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to multiply matrices
- *
- * @note @ref NEGEMMLowpMatrixMultiplyKernel low precision matrix product kernel
- *  This kernel performs the following computation:
- *
- *  -# Convert a values from int8 to int32
- *  -# Convert b values from int8 to int32
- *  -# Compute the int32 matrix product of the resulting a * b and store the result as int32
- *
- */
-class NEGEMMLowpMatrixMultiplyKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEGEMMLowpMatrixMultiplyKernel";
-    }
-    /** Constructor */
-    NEGEMMLowpMatrixMultiplyKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers)*/
-    NEGEMMLowpMatrixMultiplyKernel(const NEGEMMLowpMatrixMultiplyKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers)*/
-    NEGEMMLowpMatrixMultiplyKernel &operator=(const NEGEMMLowpMatrixMultiplyKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEGEMMLowpMatrixMultiplyKernel(NEGEMMLowpMatrixMultiplyKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEGEMMLowpMatrixMultiplyKernel &operator=(NEGEMMLowpMatrixMultiplyKernel &&) = default;
-    /** Initialise the kernel's input and output.
-     *
-     * The input matrices @p input0 and @p input1 must be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel. These two
-     * kernels change the layout of the original matrices to be more cache-friendly.
-     *
-     * @param[in]  input0 Input tensor containing the interleaved Matrix A. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED
-     * @param[in]  input1 Input tensor containing the transposed1xW Matrix B. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
-     * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: S32
-     */
-    void configure(const ITensor *input0, const ITensor *input1, ITensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixMultiplyKernel
-     *
-     * @param[in] input0 Input tensor info containing the interleaved Matrix A. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED
-     * @param[in] input1 Input tensor info containing the transposed Matrix B. Data type supported: U8/QASYMM8/S8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
-     * @param[in] output Output tensor info to store the result of matrix multiplication. Data type supported: S32
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input0;
-    const ITensor *_input1;
-    ITensor       *_output;
-    bool           _slide_matrix_b;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYKERNEL_H*/
diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h
deleted file mode 100644
index 5ce8403d3b..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel used to add the offset contribution after @ref NEGEMMLowpMatrixMultiplyKernel. The computation is performed in-place
- *
- * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel),
- * and adds to it the offset contribution of matrix A and matrix B in-place.
- *
- * The final result is:
- *
- * mm_result[i][k] = mm_result[i][k] +
- *                   (vector_sum_col[k] * a_offset) +
- *                   (vector_sum_row[i] * b_offset) +
- *                   (a_offset * b_offset * k)
- *
- */
-class NEGEMMLowpOffsetContributionKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEGEMMLowpOffsetContributionKernel";
-    }
-    /** Constructor */
-    NEGEMMLowpOffsetContributionKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers)*/
-    NEGEMMLowpOffsetContributionKernel(const NEGEMMLowpOffsetContributionKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers)*/
-    NEGEMMLowpOffsetContributionKernel &operator=(const NEGEMMLowpOffsetContributionKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEGEMMLowpOffsetContributionKernel(NEGEMMLowpOffsetContributionKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEGEMMLowpOffsetContributionKernel &operator=(NEGEMMLowpOffsetContributionKernel &&) = default;
-    /** Initialise the kernel's input and output.
-     *
-     * @param[in, out] mm_result      Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32
-     * @param[in]      vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
-     *                                Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
-     * @param[in]      vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
-     *                                Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
-     * @param[in]      k              Number of matrix A columns or Matrix B rows
-     * @param[in]      a_offset       Offset to be added to each element of the matrix A.
-     * @param[in]      b_offset       Offset to be added to each element of the matrix B.
-     */
-    void configure(ITensor *mm_result, const ITensor *vector_sum_col, const ITensor *vector_sum_row, int32_t k, int32_t a_offset, int32_t b_offset);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpOffsetContributionKernel
-     *
-     * @param[in] mm_result      Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32
-     * @param[in] vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
-     *                           Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
-     * @param[in] vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
-     *                           Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
-     * @param[in] a_offset       Offset to be added to each element of the matrix A.
-     * @param[in] b_offset       Offset to be added to each element of the matrix B.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, int32_t a_offset, int32_t b_offset);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_vector_sum_col;
-    const ITensor *_vector_sum_row;
-    ITensor       *_mm_result;
-    int32_t        _a_offset;
-    int32_t        _b_offset;
-    int32_t        _k_offset;
-    bool           _slide_vector_sum_col;
-};
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h
deleted file mode 100644
index 4db0872166..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel used to add the offset contribution and perform the output stage after @ref NEGEMMLowpMatrixMultiplyKernel.
- *
- * The computation is performed in-place
- *
- * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel),
- * and adds to it the offset contribution of matrix A and matrix B in-place.
- *
- * The output stage can perform either QuantizeDownInt32ToUint8Scale or QuantizeDownInt32ToUint8ScaleByFixedPoint for Uint8.
- * The output stage can perform either QuantizeDownInt32ToInt8Scale or QuantizeDownInt32ToInt8ScaleByFixedPoint for Int8.
- *
- * For QuantizeDownInt32ToUint8Scale/QuantizeDownInt32ToInt8Scale the final result is:
- *
- * ((mm_result'[i][k] + result_offset) * result_mult_int) >> result_shift
- *
- * For QuantizeDownInt32ToUint8ScaleByFixedPoint/QuantizeDownInt32ToInt8ScaleByFixedPoint the final result is:
- *
- * (FixedPointMul(mm_result'[i][k], result_fixedpoint_multiplier) >> result_shift) + result_offset_after_shift
- *
- * where FixedPointMul(x, y) is the nearest integer to the following
- * mathematical expression, evaluated without overflow or intermediate rounding:
- *
- * (x * y) / 2^31
- *
- * and mm_result'[i][k] = mm_result[i][k] +
- *                        (vector_sum_col[k] * a_offset) +
- *                        (vector_sum_row[i] * b_offset) +
- *                        (a_offset * b_offset * k)
- */
-
-class NEGEMMLowpOffsetContributionOutputStageKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEGEMMLowpOffsetContributionOutputStageKernel";
-    }
-    /** Constructor */
-    NEGEMMLowpOffsetContributionOutputStageKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers)*/
-    NEGEMMLowpOffsetContributionOutputStageKernel(const NEGEMMLowpOffsetContributionOutputStageKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers)*/
-    NEGEMMLowpOffsetContributionOutputStageKernel &operator=(const NEGEMMLowpOffsetContributionOutputStageKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEGEMMLowpOffsetContributionOutputStageKernel(NEGEMMLowpOffsetContributionOutputStageKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEGEMMLowpOffsetContributionOutputStageKernel &operator=(NEGEMMLowpOffsetContributionOutputStageKernel &&) = default;
-    /** Initialise the kernel's input and output.
-     *
-     * @param[in]  mm_result      Input tensor containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32
-     * @param[in]  vector_sum_col Input row-vector of sums of all the entries in each column of matrix B.
-     *                            Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
-     * @param[in]  vector_sum_row Input row-vector of sums of all the entries in each row of matrix A.
-     * @param[in]  bias           Biases tensor. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
-     *                            Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p mm_result.
-     * @param[out] output         Output tensor containing the final quantized result. Data type supported: QASYMM8/QASYMM8_SIGNED
-     * @param[in]  k              Number of matrix A columns or Matrix B rows
-     * @param[in]  a_offset       Offset to be added to each element of the matrix A.
-     * @param[in]  b_offset       Offset to be added to each element of the matrix B.
-     * @param[in]  output_stage   GEMMLowp output stage info, providing the type of quantization and the necessary parameters.
-     */
-    void configure(const ITensor *mm_result, const ITensor *vector_sum_col, const ITensor *vector_sum_row, const ITensor *bias, ITensor *output, int32_t k, int32_t a_offset, int32_t b_offset,
-                   GEMMLowpOutputStageInfo output_stage);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpOffsetContributionOutputStageKernel
-     *
-     * @param[in] mm_result      Input tensor info containing the result of @ref NEGEMMLowpMatrixMultiplyKernel. Data type supported: S32
-     * @param[in] vector_sum_col Tensor info for the input row-vector of sums of all the entries in each column of matrix B.
-     *                           Note: vector_sum_col can be a nullptr in case a_offset = 0. Data type supported: same as @p mm_result
-     * @param[in] vector_sum_row Tensor info for the input row-vector of sums of all the entries in each row of matrix A.
-     *                           Note: vector_sum_row can be a nullptr in case b_offset = 0. Data type supported: same as @p mm_result
-     * @param[in] bias           Biases tensor info. Only shared biases supported and it can be a nullptr if the addition of biases is not required.
-     *                           Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p mm_result.
-     * @param[in] output         Output tensor info containing the final quantized result. Data type supported: QASYMM8/QASYMM8_SIGNED
-     * @param[in] a_offset       Offset to be added to each element of the matrix A.
-     * @param[in] b_offset       Offset to be added to each element of the matrix B.
-     * @param[in] output_stage   GEMMLowp output stage info, providing the type of quantization and the necessary parameters.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *mm_result, const ITensorInfo *vector_sum_col, const ITensorInfo *vector_sum_row, const ITensorInfo *bias, const ITensorInfo *output, int32_t a_offset,
-                           int32_t                 b_offset,
-                           GEMMLowpOutputStageInfo output_stage);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Function to use for the particular tensors passed to configure() */
-    const ITensor          *_vector_sum_col;
-    const ITensor          *_vector_sum_row;
-    const ITensor          *_bias;
-    const ITensor          *_mm_result;
-    ITensor                *_output;
-    int32_t                 _a_offset;
-    int32_t                 _b_offset;
-    int32_t                 _k_offset;
-    bool                    _slide_vector_sum_col;
-    GEMMLowpOutputStageInfo _output_stage;
-};
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_NEGEMMLOWPOFFSETCONTRIBUTIONOUTPUTSTAGEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h
deleted file mode 100644
index 4e0c8f8fb8..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ScaleKernel.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8/QASYMM8_SIGNED
- *
- * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8/QASYMM8_SIGNED value.
- * The following computations will be performed by the kernel:
- *
- *  -# Add offset terms to final result
- *  -# Multiply each entry of result by result_mult_int
- *  -# Add bias to final result if bias tensor is not a nullptr
- *  -# Shift the int32 accumulator by result_shift
- *  -# Clamp the value between the specified min and max bounds
- *  -# Clamp the resulting int32 values:
- *  -#  -to the [0..255] range and cast to QASYMM8.
- *  -#  -to the [-128..127] range and cast to QASYMM8_SIGNED.
- *
- */
-class NEGEMMLowpQuantizeDownInt32ScaleKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEGEMMLowpQuantizeDownInt32ScaleKernel";
-    }
-    /** Constructor */
-    NEGEMMLowpQuantizeDownInt32ScaleKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers)*/
-    NEGEMMLowpQuantizeDownInt32ScaleKernel(const NEGEMMLowpQuantizeDownInt32ScaleKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers)*/
-    NEGEMMLowpQuantizeDownInt32ScaleKernel &operator=(const NEGEMMLowpQuantizeDownInt32ScaleKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEGEMMLowpQuantizeDownInt32ScaleKernel(NEGEMMLowpQuantizeDownInt32ScaleKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEGEMMLowpQuantizeDownInt32ScaleKernel &operator=(NEGEMMLowpQuantizeDownInt32ScaleKernel &&) = default;
-    /** Initialise the kernel's input and output.
-     *
-     * @param[in]  input        Input tensor. Data type supported: S32
-     * @param[in]  bias         Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
-     *                          Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
-     * @param[out] output       Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
-     * @param[out] output_stage GEMMLowp output stage metadata.
-     */
-    void configure(const ITensor *input, const ITensor *bias, ITensor *output, const GEMMLowpOutputStageInfo *output_stage);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ScaleKernel
-     *
-     * @param[in]  input        Input tensor. Data type supported: S32
-     * @param[in]  bias         Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
-     *                          Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
-     * @param[in]  output       Output tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED
-     * @param[out] output_stage GEMMLowp output stage metadata.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, const GEMMLowpOutputStageInfo *output_stage);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Template function to run the NEGEMMLowpQuantizeDownInt32ScaleKernel
-     *
-     * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
-     */
-    template <typename T>
-    void run(const Window &window);
-
-    /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ScaleKernel functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ScaleKernel::*)(const Window &window);
-
-    QuantizeDownFunctionPtr        _func;
-    const ITensor                 *_input;
-    const ITensor                 *_bias;
-    ITensor                       *_output;
-    const GEMMLowpOutputStageInfo *_output_stage;
-    bool                           _is_bounded_relu;
-};
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32SCALEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
deleted file mode 100644
index d26c778e74..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.h
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QSYMM16
- *
- * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QSYMM16 value.
- * The following computations will be performed by the kernel:
- *
- *  -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier
- *  -# Add bias to final result if bias tensor is not a nullptr
- *  -# Round to nearest division by a power-of-two using result_shift
- *  -# Clamp the value between the specified min and max bounds
- *  -# Clamp the resulting int32 values to the [-32768, 32767] range and cast to QSYMM16.
- *
- */
-class NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel";
-    }
-    /** Constructor */
-    NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers)*/
-    NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers)*/
-    NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel &&) = default;
-    /** Initialise the kernel's input and output.
-     *
-     * @param[in]  input                        Input tensor. Data type supported: S32
-     * @param[in]  bias                         Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
-     *                                          Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
-     * @param[out] output                       Output tensor. Data type supported: Data type supported: QSYMM16
-     * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
-     * @param[in]  result_shift                 Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
-     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
-     * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QSYMM16.
-     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
-     */
-    void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int min = 0, int max = 0);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
-     *
-     * @param[in] input  Input tensor info. Data type supported: S32
-     * @param[in] bias   Biases tensor info. Only shared biases supported and it can be a nullptr if the biases addition is not required.
-     *                   Biases are 1D tensor info with dimensions [OFM]. Data type supported: Same as @p input.
-     * @param[in] output Output tensor info. Data type supported: Data type supported: QSYMM16
-     * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QSYMM16. Defaults to 0.
-     * @param[in] max    (Optional) Max value used to saturate up the output result before converting back to QSYMM16,
-     *                            Along with @p min, this value can be used to implement "rectified linear unit" activation functions. Defaults to 0.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Template function to run the NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel
-     *
-     * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
-     */
-    template <bool is_bounded_relu>
-    void run(const Window &window);
-
-    /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel::*)(const Window &window);
-
-    QuantizeDownFunctionPtr _func;
-    const ITensor          *_input;
-    const ITensor          *_bias;
-    ITensor                *_output;
-    int                     _result_fixedpoint_multiplier;
-    int                     _result_shift;
-    int                     _min;
-    int                     _max;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT16SCALEBYFIXEDPOINTKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
deleted file mode 100644
index f1661680d0..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8_SIGNED
- *
- * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8_SIGNED value.
- * The following computations will be performed by the kernel:
- *
- *  -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier
- *  -# Add bias to final result if bias tensor is not a nullptr
- *  -# Round to nearest division by a power-of-two using result_shift
- *  -# Add offset to each result
- *  -# Clamp the value between the specified min and max bounds
- *  -# Clamp the resulting int32 values to the [-128..127] range and cast to QASYMM8_SIGNED.
- *
- */
-class NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel";
-    }
-    /** Constructor */
-    NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers)*/
-    NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers)*/
-    NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel &&) = default;
-    /** Initialise the kernel's input and output.
-     *
-     * @param[in]  input                        Input tensor. Data type supported: S32
-     * @param[in]  bias                         Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
-     *                                          Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
-     * @param[out] output                       Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
-     * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
-     * @param[in]  result_shift                 Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
-     * @param[in]  result_offset_after_shift    Offset to be applied to result before converting it back to QASYMM8_SIGNED
-     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED
-     * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED,
-     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions
-     */
-    void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
-     *
-     * @param[in] input  Input tensor. Data type supported: S32
-     * @param[in] bias   Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
-     *                   Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
-     * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8_SIGNED
-     * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QASYMM8_SIGNED
-     * @param[in] max    (Optional) Max value used to saturate up the output result before converting back to QASYMM8_SIGNED,
-     *                            Along with @p min, this value can be used to implement "rectified linear unit" activation functions
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Template function to run the NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel
-     *
-     * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
-     */
-    template <bool is_bounded_relu>
-    void run(const Window &window);
-
-    /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel::*)(const Window &window);
-
-    QuantizeDownFunctionPtr _func;
-    const ITensor          *_input;
-    const ITensor          *_bias;
-    ITensor                *_output;
-    int                     _result_fixedpoint_multiplier;
-    int                     _result_shift;
-    int                     _result_offset_after_shift;
-    int                     _min;
-    int                     _max;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOINT8SCALEBYFIXEDPOINTKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
deleted file mode 100644
index 94ca617466..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel.h
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel used to quantize down the int32 accumulator values of GEMMLowp to QASYMM8
- *
- * This kernel takes a final int32 accumulator value (the output of @ref NEGEMMLowpMatrixMultiplyKernel), and processes it to obtain the final QASYMM8 value.
- * The following computations will be performed by the kernel:
- *
- *  -# Compute fixed point multiplication between each entry of input by result_fixedpoint_multiplier
- *  -# Add bias to final result if bias tensor is not a nullptr
- *  -# Round to nearest division by a power-of-two using result_shift
- *  -# Add offset to each result
- *  -# Clamp the value between the specified min and max bounds
- *  -# Clamp the resulting int32 values to the [0..255] range and cast to QASYMM8.
- *
- */
-class NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel";
-    }
-    /** Constructor */
-    NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers)*/
-    NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers)*/
-    NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &operator=(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &operator=(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel &&) = default;
-    /** Initialise the kernel's input and output.
-     *
-     * @param[in]  input                        Input tensor. Data type supported: S32
-     * @param[in]  bias                         Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
-     *                                          Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
-     * @param[out] output                       Output tensor. Data type supported: Data type supported: QASYMM8
-     * @param[in]  result_fixedpoint_multiplier Fixed point value to be multiplied to each element of the input matrix when once the result_offset has been add
-     * @param[in]  result_shift                 Integer value used to round to nearest division by a power-of-two the result after the fixed point multiplication
-     * @param[in]  result_offset_after_shift    Offset to be applied to result before converting it back to QASYMM8
-     * @param[in]  min                          (Optional) Min value used to saturate down the output result before converting back to QASYMM8
-     * @param[in]  max                          (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
-     *                                          Along with @p min, this value can be used to implement "rectified linear unit" activation functions
-     */
-    void configure(const ITensor *input, const ITensor *bias, ITensor *output, int result_fixedpoint_multiplier, int result_shift, int result_offset_after_shift, int min = 0, int max = 0);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
-     *
-     * @param[in] input  Input tensor. Data type supported: S32
-     * @param[in] bias   Biases tensor. Only shared biases supported and it can be a nullptr if the biases addition is not required.
-     *                   Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p input.
-     * @param[in] output Output tensor. Data type supported: Data type supported: QASYMM8
-     * @param[in] min    (Optional) Min value used to saturate down the output result before converting back to QASYMM8
-     * @param[in] max    (Optional) Max value used to saturate up the output result before converting back to QASYMM8,
-     *                            Along with @p min, this value can be used to implement "rectified linear unit" activation functions
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *bias, const ITensorInfo *output, int min = 0, int max = 0);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Template function to run the NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel
-     *
-     * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
-     */
-    template <bool is_bounded_relu>
-    void run(const Window &window);
-
-    /** Common signature for all the specialised NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using QuantizeDownFunctionPtr = void (NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointKernel::*)(const Window &window);
-
-    QuantizeDownFunctionPtr _func;
-    const ITensor          *_input;
-    const ITensor          *_bias;
-    ITensor                *_output;
-    int                     _result_fixedpoint_multiplier;
-    int                     _result_shift;
-    int                     _result_offset_after_shift;
-    int                     _min;
-    int                     _max;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEGEMMLOWPQUANTIZEDOWNINT32TOUINT8SCALEBYFIXEDPOINTKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h
deleted file mode 100644
index f41941f796..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H
-#define ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-struct GEMMLowpReductionKernelInfo;
-
-/** Common interface for all NEON reduction kernels */
-class INEGEMMLowpReductionKernel : public INEKernel
-{
-public:
-    /** Constructor */
-    INEGEMMLowpReductionKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers)*/
-    INEGEMMLowpReductionKernel(const INEGEMMLowpReductionKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers)*/
-    INEGEMMLowpReductionKernel &operator=(const INEGEMMLowpReductionKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    INEGEMMLowpReductionKernel(INEGEMMLowpReductionKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    INEGEMMLowpReductionKernel &operator=(INEGEMMLowpReductionKernel &&) = default;
-
-    /** Initialise the kernel's input and output.
-     *
-     * @param[in]  input  Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
-     * @param[out] output Output row-vector of sums of all the entries in each row/col of input tensor. Data type supported: S32
-     * @param[in]  info   Kernel metadata:
-     *                    - k            Number of matrix columns/rows depending on the type of reduction.
-     *                    - is_reshaped  True if the matrix has been reshaped.
-     *                    - scalar       Scalar value to multiply each reduced column/row by.
-     *                    - mul_byscalar True if each reduced column/row must be multiplied by a scalar value.
-     */
-    virtual void configure(const ITensor *input, ITensor *output, const GEMMLowpReductionKernelInfo &info) = 0;
-
-protected:
-    const ITensor *_input;
-    ITensor       *_output;
-    int32_t        _k;
-    int32_t        _scalar;
-    bool           _mul_by_scalar;
-};
-
-/** NEON kernel used to compute the row-vectors of sums of all the entries in each row of Matrix A.
- *
- * @note This stage is needed to handle the offset of matrix product
- *       https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
- */
-class NEGEMMLowpMatrixAReductionKernel : public INEGEMMLowpReductionKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEGEMMLowpMatrixAReductionKernel";
-    }
-    /** Initialise the kernel's input and output.
-     *
-     * @param[in]  mtx_a          Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
-     * @param[out] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
-     * @param[in]  info           Kernel metadata:
-     *                            - k            (num_mtx_a_cols) Number of matrix A columns
-     *                            - is_reshaped  (is_interleaved4x4) True if the matrix A has been interleaved4x4
-     *                            - scalar       Scalar value to multiply each reduced row by.
-     *                            - mul_byscalar True if each reduced column must be multiplied by a scalar value.
-     */
-    void configure(const ITensor *mtx_a, ITensor *vector_sum_row, const GEMMLowpReductionKernelInfo &info) override;
-    /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixAReductionKernel
-     *
-     * @param[in] mtx_a          Input tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
-     * @param[in] vector_sum_row Output row-vector of sums of all the entries in each row of mtx_a. Data type supported: S32
-     * @param[in] info           Kernel metadata:
-     *                           - k            (num_mtx_a_cols) Number of matrix A columns
-     *                           - is_reshaped  (is_interleaved4x4) True if the matrix A has been interleaved4x4
-     *                           - scalar       Scalar value to multiply each reduced row by.
-     *                           - mul_byscalar True if each reduced column must be multiplied by a scalar value.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *mtx_a, const ITensorInfo *vector_sum_row, const GEMMLowpReductionKernelInfo &info);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Execution of the reduction kernel specialized on the input type
-     *
-     * @param[in] window Execution window
-     */
-    template <typename T>
-    void run_internal(const Window &window);
-};
-
-/** NEON kernel used to compute the row-vectors of sums of all the entries in each column of Matrix B.
- *
- * @note This stage is needed to handle the offset of matrix product
- *       https://github.com/google/gemmlowp/blob/master/doc/low-precision.md
- */
-class NEGEMMLowpMatrixBReductionKernel : public INEGEMMLowpReductionKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEGEMMLowpMatrixBReductionKernel";
-    }
-    /** Initialise the kernel's input and output.
-     *
-     * @param[in]  mtx_b          Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
-     * @param[out] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
-     * @param[in]  info           Kernel metadata:
-     *                            - k            (num_mtx_b_rows) Number of matrix B rows.
-     *                            - is_reshaped  (is_transposed1xW) True if the input tensor is transposed 1xW.
-     *                            - scalar       Scalar value to multiply each reduced row by.
-     *                            - mul_byscalar True if each reduced row must be multiplied by a scalar value.
-     */
-    void configure(const ITensor *mtx_b, ITensor *vector_sum_col, const GEMMLowpReductionKernelInfo &info) override;
-    /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMLowpMatrixBReductionKernel
-     *
-     * @param[in] mtx_b          Input tensor. Data type supported: Data type supported: QASYMM8/QASYMM8_SIGNED/QSYMM8/QSYMM8_PER_CHANNEL
-     * @param[in] vector_sum_col Output row-vector of sums of all the entries in each column of mtx_b. Data type supported: S32
-     * @param[in] info           Kernel metadata:
-     *                           - k            (num_mtx_b_rows) Number of matrix B rows.
-     *                           - is_reshaped  (is_transposed1xW) True if the input tensor is transposed 1xW.
-     *                           - scalar       Scalar value to multiply each reduced row by.
-     *                           - mul_byscalar True if each reduced row must be multiplied by a scalar value.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *mtx_b, const ITensorInfo *vector_sum_col, const GEMMLowpReductionKernelInfo &info);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Execution of the reduction kernel specialized on the input type
-     *
-     * @param[in] window Execution window
-     * @param[in] info   Thread-related information
-     */
-    template <typename T>
-    void run_internal(const Window &window, const ThreadInfo &info);
-};
-} // namespace arm_compute
-
-#endif /* ARM_COMPUTE_NEGEMMLOWREDUCTIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
deleted file mode 100644
index 79f62561da..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H
-#define ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform the in-place matrix addition between 2 matrices taking into account that the second matrix might be weighted by a scalar value beta:
- *
- * @note [ MTX_OUT = MTX_0 + beta * MTX_1 ] with MTX_0 and MTX_1 of the same size
- *
- * @note This stage is used to finalize the GEMM result and it is computed if and only if beta != 0.0. In case this kernel is used for finalizing GEMM result, we have:
- *        - MTX_0 = A * B * alpha, where MTX_0 is the output of @ref NEGEMMMatrixMultiplyKernel
- *        - MTX_1 = C
- */
-class NEGEMMMatrixAdditionKernel : public INESimpleKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEGEMMMatrixAdditionKernel";
-    }
-    /** Constructor */
-    NEGEMMMatrixAdditionKernel();
-    /** Prevent instances of this class from being copied */
-    NEGEMMMatrixAdditionKernel(const NEGEMMMatrixAdditionKernel &) = delete;
-    /** Prevent instances of this class from being copied */
-    NEGEMMMatrixAdditionKernel &operator=(const NEGEMMMatrixAdditionKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEGEMMMatrixAdditionKernel(NEGEMMMatrixAdditionKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEGEMMMatrixAdditionKernel &operator=(NEGEMMMatrixAdditionKernel &&) = default;
-    /** Initialise the kernel's input and output.
-     *
-     * @note The input and output tensor must have the same dimensions
-     *
-     * @param[in]      input  Input tensor (Matrix C). Data types supported: F16/F32
-     * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input.
-     * @param[in]      beta   Weight of matrix C
-     */
-    void configure(const ITensor *input, ITensor *output, float beta);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixAdditionKernel.
-     *
-     * @note The input and output tensor must have the same dimensions
-     *
-     * @param[in] input  Input tensor info (Matrix C). Data types supported: F16/F32
-     * @param[in] output Output tensor info. If this kernel is used to finalize the GEMM result, output contains the result obtained by the kernel @ref NEGEMMMatrixMultiplyKernel. Data type supported: the same as @p input.
-     * @param[in] beta   Weight of matrix C
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, float beta);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Common signature for all the matrix addition functions
-     *
-     * @param[in]  input  An input tensor. Data types supported: F16/F32
-     * @param[out] output The output tensor. Data type supported: same as @p input
-     * @param[in]  window Region on which to execute the kernel.
-     * @param[in]  beta   Weight of matrix C
-     */
-    using MatrixAdditionFunction = void(const ITensor *input, ITensor *output, const Window &window, float beta);
-    /** Matrix addition function to use for the particular tensor types passed to configure() */
-    MatrixAdditionFunction *_func;
-    float                   _beta;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEGEMMMATRIXADDITIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h
deleted file mode 100644
index f79e07ebb4..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H
-#define ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to multiply two input matrices "A" and "B". All elements of the output matrix/vector will be multiplied by alpha after the matrix multiplication
- *
- * @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref NEGEMMInterleave4x4Kernel" and @ref NEGEMMTranspose1xWKernel
- * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped
- *
- */
-class NEGEMMMatrixMultiplyKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEGEMMMatrixMultiplyKernel";
-    }
-    /** Constructor */
-    NEGEMMMatrixMultiplyKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEGEMMMatrixMultiplyKernel(const NEGEMMMatrixMultiplyKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEGEMMMatrixMultiplyKernel &operator=(const NEGEMMMatrixMultiplyKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEGEMMMatrixMultiplyKernel(NEGEMMMatrixMultiplyKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEGEMMMatrixMultiplyKernel &operator=(NEGEMMMatrixMultiplyKernel &&) = default;
-    /** Initialise the kernel's input and output.
-     *
-     * @note If the output tensor is a matrix, the input matrices @p input0 and @p input1 should be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel
-     *       These two kernels change the layout of the original matrices to be more cache-friendly.
-     *
-     * @param[in]  input0         Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32
-     * @param[in]  input1         Input tensor containing the transposed Matrix B if the first input tensor A is not a vector.
-     *                            If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0
-     * @param[out] output         Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
-     * @param[in]  alpha          Weight of the matrix product
-     * @param[in]  is_interleaved (Optional) True if input0 and input1 have been reshaped respectively using @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel
-     * @param[in]  reshape_info   (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
-     */
-    void configure(const ITensor *input0, const ITensor *input1, ITensor *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo());
-    /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixMultiplyKernel
-     *
-     * @param[in] input0         Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32
-     * @param[in] input1         Input tensor containing the transposed Matrix B if the first input tensor A is not a vector.
-     *                           If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0
-     * @param[in] output         Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0.
-     * @param[in] alpha          Weight of the matrix product
-     * @param[in] is_interleaved (Optional) True if input0 and input1 have been reshaped respectively using @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel
-     * @param[in] reshape_info   (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input0;
-    const ITensor *_input1;
-    ITensor       *_output;
-    float          _alpha;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMMATRIXMULTIPLYKERNEL_H*/
diff --git a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h b/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h
deleted file mode 100644
index 756ac6a852..0000000000
--- a/arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H
-#define ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** NEON kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor)
- *
- * Following an example of how the transposition1xW works when the input data is F32
- *
- * @f[
- * \left( \begin{array}{cccc}
- * a00 & a01 & a02 & a03 \\
- * a10 & a11 & a12 & a13 \\
- * a20 & a21 & a22 & a23 \\
- * a30 & a31 & a32 & a33 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{ccccccccccccccccc}
- * a00 & a01 & a02 & a03 & a10 & a11 & a12 & a13 & a20 & a21 & a22 & a23 & a30 & a31 & a32 & a33 \\
- * \end{array} \right)
- * @f]
- *
- * Following an example of how the transposition1xW works when the input data type is F16
- *
- * @f[
- * \left( \begin{array}{cccccccc}
- * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a07 \\
- * a10 & a11 & a12 & a13 & a14 & a15 & a16 & a17 \\
- * a20 & a21 & a22 & a23 & a24 & a25 & a26 & a27 \\
- * a30 & a31 & a32 & a33 & a34 & a35 & a36 & a37 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc}
- * a00 & a01 & a02 & a03 & a04 & a05 & a06 & a07 & a10 & a11 & a12 & a13 & a14 & a15 & a16 & a17 & a20 & a21 & a22 & a23 & a24 & a25 & a26 & a27 & a30 & a31 & a32 & a33 & a34 & a35 & a36 & a37\\
- * \end{array} \right)
- * @f]
- *
- * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor)
- *
- */
-class NEGEMMTranspose1xWKernel : public INESimpleKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEGEMMTranspose1xWKernel";
-    }
-    /** Initialise the kernel's input and output.
-     *
-     * @param[in]  input  Input tensor. Data types supported: All
-     * @param[out] output Output tensor. Data type supported: same as @p input.
-     */
-    void configure(const ITensor *input, ITensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMTranspose1xWKernel
-     *
-     * @param[in] input  Input tensor info. Data types supported: All
-     * @param[in] output Output tensor info. Data type supported: same as @p input.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGEMMTRANSPOSE1xWKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGatherKernel.h b/arm_compute/core/NEON/kernels/NEGatherKernel.h
deleted file mode 100644
index 31d4f19ed0..0000000000
--- a/arm_compute/core/NEON/kernels/NEGatherKernel.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NEGATHERKERNEL_H
-#define ARM_COMPUTE_NEGATHERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Kernel to perform other operation on NEON */
-class NEGatherKernel : public INEKernel
-{
-public:
-    /** Default constructor. */
-    NEGatherKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
-    NEGatherKernel(const NEGatherKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
-    NEGatherKernel &operator=(const NEGatherKernel &) = delete;
-    /** Allow instances of this class to be moved. */
-    NEGatherKernel(NEGatherKernel &&) = default;
-    /** Allow instances of this class to be moved. */
-    NEGatherKernel &operator=(NEGatherKernel &&) = default;
-    /** Default detructor */
-    ~NEGatherKernel() = default;
-
-    /** Name of the kernel
-     *
-     * @return Kernel name
-     */
-    const char *name() const override
-    {
-        return "NEGatherKernel";
-    }
-    /** Initialise the kernel's inputs and outputs
-     *
-     * @param[in]  input   Source tensor. Supported tensor rank: up to 4. Data type supported: All
-     * @param[in]  indices Indices tensor. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis])
-     * @param[out] output  Destination tensor. Data type supported: Same as @p input
-     * @param[in]  axis    (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
-     */
-    void configure(const ITensor *input, const ITensor *indices, ITensor *output, int axis = 0);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEGatherKernel
-     *
-     * @param[in] input   Source tensor info. Supported tensor rank: up to 4. Data type supported: All
-     * @param[in] indices Indices tensor info. Supported tensor rank: up to 1. Must be one of the following type: U32/S32. Each value Must be in range [0, input.shape[@p axis])
-     * @param[in] output  Destination tensor info. Data type supported: Same as @p input
-     * @param[in] axis    (Optional) The axis in @p input to gather @p indices from. Negative values wrap around. Defaults to 0
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, int axis);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Implementation of the gather operation for 0 axis.
-     *
-     * For gather on the 0 axis an element by element copy is performed.
-     *
-     * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window())
-     * @param[in] info   Info about executing thread and CPU.
-     */
-    template <typename U>
-    void gather_0_axis(const Window &window, const ThreadInfo &info);
-
-    /** Implementation of the gather operation.
-     *
-     * For 1<=axis a row-wise copy is taking place.
-     *
-     * @param[in] window Region on which to execute the kernel. (Must be a region of the window returned by window())
-     * @param[in] info   Info about executing thread and CPU.
-     */
-    template <typename U>
-    void gather_n_axis(const Window &window, const ThreadInfo &info);
-
-    using kernel_ptr = void (NEGatherKernel::*)(const Window &window, const ThreadInfo &info);
-
-    const ITensor *_input;
-    const ITensor *_indices;
-    int            _axis;
-    ITensor       *_output;
-    kernel_ptr     _func;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEGATHERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h b/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h
deleted file mode 100644
index c8141817db..0000000000
--- a/arm_compute/core/NEON/kernels/NEGaussian3x3Kernel.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H
-#define ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a Gaussian 3x3 filter */
-class NEGaussian3x3Kernel : public INESimpleKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEGaussian3x3Kernel";
-    }
-    /** Set the source, destination and border mode of the kernel
-     *
-     * @param[in]  input            Source tensor. Data type supported: U8
-     * @param[out] output           Destination tensor. Data type supported: same as @p input
-     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
-     */
-    void configure(const ITensor *input, ITensor *output, bool border_undefined);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGAUSSIAN3x3KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h b/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h
deleted file mode 100644
index b489f4b458..0000000000
--- a/arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H
-#define ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a Gaussian 5x5 filter (horizontal pass) */
-class NEGaussian5x5HorKernel : public INESimpleKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEGaussian5x5HorKernel";
-    }
-    /** Default constructor */
-    NEGaussian5x5HorKernel();
-
-    /** Initialise the kernel's source, destination and border mode.
-     *
-     * @param[in]  input            Source tensor. Data type supported: U8.
-     * @param[out] output           Destination tensor. Data type supported: S16.
-     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
-     */
-    void configure(const ITensor *input, ITensor *output, bool border_undefined);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    BorderSize _border_size;
-};
-
-/** NEON kernel to perform a Gaussian 5x5 filter (vertical pass) */
-class NEGaussian5x5VertKernel : public INESimpleKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEGaussian5x5VertKernel";
-    }
-    /** Initialise the kernel's source, destination and border mode.
-     *
-     * @param[in]  input            Source tensor. Data type supported: S16.
-     * @param[out] output           Destination tensor, Data type supported: U8.
-     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
-     */
-    void configure(const ITensor *input, ITensor *output, bool border_undefined);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGAUSSIAN5x5KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h b/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h
deleted file mode 100644
index 33a4452382..0000000000
--- a/arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H
-#define ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a GaussianPyramid (horizontal pass) */
-class NEGaussianPyramidHorKernel : public INESimpleKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEGaussianPyramidHorKernel";
-    }
-    /** Default constructor */
-    NEGaussianPyramidHorKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEGaussianPyramidHorKernel(NEGaussianPyramidHorKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEGaussianPyramidHorKernel &operator=(NEGaussianPyramidHorKernel &&) = default;
-    /** Default destructor */
-    ~NEGaussianPyramidHorKernel() = default;
-
-    /** Initialise the kernel's source, destination and border mode.
-     *
-     * @param[in]  input  Source tensor. Data type supported: U8.
-     * @param[out] output Destination tensor. Output should have half the input width. Data type supported: S16.
-     */
-    void configure(const ITensor *input, ITensor *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    int _l2_load_offset;
-};
-
-/** NEON kernel to perform a GaussianPyramid (vertical pass) */
-class NEGaussianPyramidVertKernel : public INESimpleKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEGaussianPyramidVertKernel";
-    }
-    /** Default constructor */
-    NEGaussianPyramidVertKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEGaussianPyramidVertKernel(NEGaussianPyramidVertKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEGaussianPyramidVertKernel &operator=(NEGaussianPyramidVertKernel &&) = default;
-    /** Default destructor */
-    ~NEGaussianPyramidVertKernel() = default;
-
-    /** Initialise the kernel's source, destination and border mode.
-     *
-     * @param[in]  input  Source tensor. Data type supported: S16.
-     * @param[out] output Destination tensor. Output should have half the input height. Data type supported: U8.
-     */
-    void configure(const ITensor *input, ITensor *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    int _t2_load_offset;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEGAUSSIANPYRAMIDKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h b/arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h
deleted file mode 100644
index 7b82488c44..0000000000
--- a/arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H
-#define ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for Compute All Anchors kernel */
-class NEComputeAllAnchorsKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEComputeAllAnchorsKernel";
-    }
-
-    /** Default constructor */
-    NEComputeAllAnchorsKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEComputeAllAnchorsKernel(const NEComputeAllAnchorsKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEComputeAllAnchorsKernel &operator=(const NEComputeAllAnchorsKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEComputeAllAnchorsKernel(NEComputeAllAnchorsKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEComputeAllAnchorsKernel &operator=(NEComputeAllAnchorsKernel &&) = default;
-    /** Default destructor */
-    ~NEComputeAllAnchorsKernel() = default;
-
-    /** Set the input and output tensors.
-     *
-     * @param[in]  anchors     Source tensor. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32
-     * @param[out] all_anchors Destination tensor. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
-     * @param[in]  info        Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
-     *
-     */
-    void configure(const ITensor *anchors, ITensor *all_anchors, const ComputeAnchorsInfo &info);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref NEComputeAllAnchorsKernel
-     *
-     * @param[in] anchors     Source tensor info. Original set of anchors of size (4, A), where A is the number of anchors. Data types supported: QSYMM16/F16/F32
-     * @param[in] all_anchors Destination tensor info. Destination anchors of size (4, H*W*A) where H and W are the height and width of the feature map and A is the number of anchors. Data types supported: Same as @p input
-     * @param[in] info        Contains Compute Anchors operation information described in @ref ComputeAnchorsInfo
-     *
-     * @return a Status
-     */
-    static Status validate(const ITensorInfo *anchors, const ITensorInfo *all_anchors, const ComputeAnchorsInfo &info);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    template <typename T>
-    void internal_run(const Window &window);
-
-    const ITensor     *_anchors;
-    ITensor           *_all_anchors;
-    ComputeAnchorsInfo _anchors_info;
-};
-} // arm_compute
-#endif // ARM_COMPUTE_NEGENERATEPROPOSALSLAYERKERNEL_H
diff --git a/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h b/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h
deleted file mode 100644
index b0206ec091..0000000000
--- a/arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H
-#define ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H
-
-#include "arm_compute/core/IHOG.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Size2D.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform HOG Orientation Binning */
-class NEHOGOrientationBinningKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEHOGOrientationBinningKernel";
-    }
-    /** Default constructor */
-    NEHOGOrientationBinningKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEHOGOrientationBinningKernel(const NEHOGOrientationBinningKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEHOGOrientationBinningKernel &operator=(const NEHOGOrientationBinningKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEHOGOrientationBinningKernel(NEHOGOrientationBinningKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEHOGOrientationBinningKernel &operator=(NEHOGOrientationBinningKernel &&) = default;
-    /** Default destructor */
-    ~NEHOGOrientationBinningKernel() = default;
-
-    /**  Initialise the kernel's inputs, output and HOG's metadata
-     *
-     * @param[in]  input_magnitude Input tensor which stores the magnitude of the gradient for each pixel. Data type supported: S16.
-     * @param[in]  input_phase     Input tensor which stores the phase of the gradient for each pixel. Data type supported: U8
-     * @param[out] output          Output tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell
-     * @param[in]  hog_info        HOG's metadata
-     */
-    void configure(const ITensor *input_magnitude, const ITensor *input_phase, ITensor *output, const HOGInfo *hog_info);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Common signature for all the specialised block normalization functions
-     *
-     * @param[in]  mag_row_ptr   Pointer to the first row of the cell in the magnitude tensor
-     * @param[in]  phase_row_ptr Pointer to the first row of the cell in the phase tensor
-     * @param[out] output_ptr    Pointer to the output cell of hog space tensor
-     * @param[in]  mag_stride    Stride of the magnitude tensor
-     * @param[in]  phase_stride  Stride of the phase tensor
-     * @param[in]  cell_width    Width of the cell
-     * @param[in]  cell_height   Height of the cell
-     * @param[in]  num_bins      Number of bins for each cell
-     * @param[in]  phase_scale   Scale factor to apply to the phase in order to calculate the histogram index
-     */
-    using OrientBinFunc = void(const int16_t *__restrict mag_row_ptr, const uint8_t *__restrict phase_row_ptr, float *__restrict output_ptr, size_t mag_stride, size_t phase_stride, size_t cell_width,
-                               size_t cell_height, size_t num_bins, float phase_scale);
-    /** Orientation binning function to use for the particular cell width passed to configure() */
-    OrientBinFunc *_func;
-    const ITensor *_input_magnitude;
-    const ITensor *_input_phase;
-    ITensor       *_output;
-    size_t         _cell_width;
-    size_t         _cell_height;
-    size_t         _num_bins;
-    float          _phase_scale;
-};
-
-/** NEON kernel to perform HOG block normalization */
-class NEHOGBlockNormalizationKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEHOGBlockNormalizationKernel";
-    }
-    /** Default constructor */
-    NEHOGBlockNormalizationKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEHOGBlockNormalizationKernel(const NEHOGBlockNormalizationKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEHOGBlockNormalizationKernel &operator=(const NEHOGBlockNormalizationKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEHOGBlockNormalizationKernel(NEHOGBlockNormalizationKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEHOGBlockNormalizationKernel &operator=(NEHOGBlockNormalizationKernel &&) = default;
-    /** Default destructor */
-    ~NEHOGBlockNormalizationKernel() = default;
-
-    /** Initialise the kernel's input, output and HOG's metadata
-     *
-     * @param[in]  input    Input tensor which stores the local HOG for each cell. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per cell
-     * @param[out] output   Output tensor which stores the normalised blocks. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
-     * @param[in]  hog_info HOG's metadata
-     */
-    void configure(const ITensor *input, ITensor *output, const HOGInfo *hog_info);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Common signature for all the specialised block normalization functions
-     *
-     * @param[in]  input_row_ptr              Pointer to the first row of the block in the input hog space tensor
-     * @param[out] output_ptr                 Pointer to the output block of the hog normalized space
-     * @param[in]  input_stride               Stride of the input hog space tensor
-     * @param[in]  num_cells_per_block_height Number of cells per block along the Y direction
-     * @param[in]  num_bins_block_x           Number of bins per block along the X direction
-     * @param[in]  num_bins_block             Number of total bins per block
-     * @param[in]  l2_hyst_threshold          Threshold to use for l2 hysteresis normalization
-     */
-    using BlockNormFunc = void(const float *input_row_ptr, float *output_ptr, size_t input_stride, size_t num_cells_per_block_height, size_t num_bins_block_x, size_t num_bins_block,
-                               float l2_hyst_threshold);
-    /** Block normalization function to use for the particular normalization type passed to configure() */
-    BlockNormFunc *_func;
-    const ITensor *_input;
-    ITensor       *_output;
-    Size2D         _num_cells_per_block;
-    Size2D         _num_cells_per_block_stride;
-    size_t         _num_bins;
-    float          _l2_hyst_threshold;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEHOGDESCRIPTORKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h b/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h
deleted file mode 100644
index 2c23a2b11d..0000000000
--- a/arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHOGDETECTORKERNEL_H
-#define ARM_COMPUTE_NEHOGDETECTORKERNEL_H
-
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/IHOG.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "support/Mutex.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform HOG detector kernel using linear SVM */
-class NEHOGDetectorKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEHOGDetectorKernel";
-    }
-    /** Default constructor */
-    NEHOGDetectorKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEHOGDetectorKernel(const NEHOGDetectorKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEHOGDetectorKernel &operator=(const NEHOGDetectorKernel &) = delete;
-    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
-    NEHOGDetectorKernel(NEHOGDetectorKernel &&) = delete;
-    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
-    NEHOGDetectorKernel &operator=(NEHOGDetectorKernel &&) = delete;
-    /** Default destructor */
-    ~NEHOGDetectorKernel() = default;
-
-    /** Initialise the kernel's input, HOG data-object, detection window, the stride of the detection window, the threshold and index of the object to detect
-     *
-     * @param[in]  input                   Input tensor which stores the HOG descriptor obtained with @ref NEHOGOrientationBinningKernel. Data type supported: F32. Number of channels supported: equal to the number of histogram bins per block
-     * @param[in]  hog                     HOG data object used by @ref NEHOGOrientationBinningKernel and  @ref NEHOGBlockNormalizationKernel
-     * @param[out] detection_windows       Array of @ref DetectionWindow. This array stores all the detected objects
-     * @param[in]  detection_window_stride Distance in pixels between 2 consecutive detection windows in x and y directions.
-     *                                     It must be multiple of the hog->info()->block_stride()
-     * @param[in]  threshold               (Optional) Threshold for the distance between features and SVM classifying plane
-     * @param[in]  idx_class               (Optional) Index of the class used for evaluating which class the detection window belongs to
-     */
-    void configure(const ITensor *input, const IHOG *hog, IDetectionWindowArray *detection_windows, const Size2D &detection_window_stride, float threshold = 0.0f, uint16_t idx_class = 0);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor         *_input;
-    IDetectionWindowArray *_detection_windows;
-    const float           *_hog_descriptor;
-    float                  _bias;
-    float                  _threshold;
-    uint16_t               _idx_class;
-    size_t                 _num_bins_per_descriptor_x;
-    size_t                 _num_blocks_per_descriptor_y;
-    size_t                 _block_stride_width;
-    size_t                 _block_stride_height;
-    size_t                 _detection_window_width;
-    size_t                 _detection_window_height;
-    size_t                 _max_num_detection_windows;
-    arm_compute::Mutex     _mutex;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEHOGDETECTORKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h b/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h
deleted file mode 100644
index 084dd7deba..0000000000
--- a/arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHARRISCORNERSKERNEL_H
-#define ARM_COMPUTE_NEHARRISCORNERSKERNEL_H
-
-#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h"
-#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h"
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-using IImage = ITensor;
-
-/** Common interface for all Harris Score kernels */
-class INEHarrisScoreKernel : public INEKernel
-{
-public:
-    /** Default constructor */
-    INEHarrisScoreKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    INEHarrisScoreKernel(const INEHarrisScoreKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    INEHarrisScoreKernel &operator=(const INEHarrisScoreKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    INEHarrisScoreKernel(INEHarrisScoreKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    INEHarrisScoreKernel &operator=(INEHarrisScoreKernel &&) = default;
-    /** Default destructor */
-    ~INEHarrisScoreKernel() = default;
-
-public:
-    /** Setup the kernel parameters
-     *
-     * @param[in]  input1           Source image (gradient X). Data types supported: S16/S32
-     * @param[in]  input2           Source image (gradient Y). Data types supported: same as @ input1
-     * @param[out] output           Destination image (harris score). Data types supported: F32
-     * @param[in]  norm_factor      Normalization factor to use accordingly with the gradient size (Must be different from 0)
-     * @param[in]  strength_thresh  Minimum threshold with which to eliminate Harris Corner scores (computed using the normalized Sobel kernel).
-     * @param[in]  sensitivity      Sensitivity threshold k from the Harris-Stephens equation
-     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
-     */
-    virtual void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) = 0;
-
-protected:
-    const IImage *_input1;          /**< Source image - Gx component */
-    const IImage *_input2;          /**< Source image - Gy component */
-    IImage       *_output;          /**< Source image - Harris score */
-    float         _sensitivity;     /**< Sensitivity value */
-    float         _strength_thresh; /**< Threshold value */
-    float         _norm_factor;     /**< Normalization factor */
-    BorderSize    _border_size;     /**< Border size */
-};
-
-/** Template NEON kernel to perform Harris Score.
- *  The implementation supports 3, 5, and 7 for the block_size
- */
-template <int32_t block_size>
-class NEHarrisScoreKernel : public INEHarrisScoreKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEHarrisScoreKernel";
-    }
-    /** Default constructor */
-    NEHarrisScoreKernel();
-    // Inherited methods overridden:
-    void configure(const IImage *input1, const IImage *input2, IImage *output, float norm_factor, float strength_thresh, float sensitivity, bool border_undefined) override;
-    BorderSize border_size() const override;
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Common signature for all the specialised harris score functions */
-    using HarrisScoreFunction = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int32_t input_stride,
-                                     float norm_factor, float sensitivity, float strength_thresh);
-    /** Harris Score function to use for the particular image types passed to configure() */
-    HarrisScoreFunction *_func;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEHARRISCORNERSKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h
deleted file mode 100644
index 8a5e86acc4..0000000000
--- a/arm_compute/core/NEON/kernels/NEHeightConcatenateLayerKernel.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H
-#define ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the height concatenate kernel.
- *  The input tensor will be concatenated into the output tensor.
- */
-class NEHeightConcatenateLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEHeightConcatenateLayerKernel";
-    }
-    /** Default constructor */
-    NEHeightConcatenateLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEHeightConcatenateLayerKernel(const NEHeightConcatenateLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEHeightConcatenateLayerKernel &operator=(const NEHeightConcatenateLayerKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEHeightConcatenateLayerKernel(NEHeightConcatenateLayerKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEHeightConcatenateLayerKernel &operator=(NEHeightConcatenateLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEHeightConcatenateLayerKernel() = default;
-    /** Initialise the kernel's inputs and output
-     *
-     * @param[in]     input         Input tensor info. Data types supported: All
-     * @param[in]     height_offset The starting offset on the Y axis for the output tensor.
-     * @param[in,out] output        Output tensor info. Data types supported: Same as @p input.
-     *
-     */
-    void configure(const ITensorInfo *input, unsigned int height_offset, ITensorInfo *output);
-    /**  Static function to check if given info will lead to a valid configuration of @ref NEHeightConcatenateLayerKernel
-     *
-     * @param[in] input         Input tensor info. Data types supported: All
-     * @param[in] height_offset The starting offset on the Y axis for the output tensor.
-     * @param[in] output        Output tensor info. Data types supported: Same as @p input.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, unsigned int height_offset, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-
-private:
-    unsigned int _height_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEHEIGHTCONCATENATELAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEHistogramKernel.h b/arm_compute/core/NEON/kernels/NEHistogramKernel.h
deleted file mode 100644
index 6e5b92273b..0000000000
--- a/arm_compute/core/NEON/kernels/NEHistogramKernel.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEHISTOGRAMKERNEL_H
-#define ARM_COMPUTE_NEHISTOGRAMKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "support/Mutex.h"
-
-#include <cstddef>
-#include <cstdint>
-
-namespace arm_compute
-{
-class IDistribution1D;
-class ITensor;
-using IImage = ITensor;
-
-/** Interface for the histogram kernel */
-class NEHistogramKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEHistogramKernel";
-    }
-    /** Default constructor */
-    NEHistogramKernel();
-    /** Default destructor */
-    ~NEHistogramKernel() = default;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEHistogramKernel(const NEHistogramKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEHistogramKernel &operator=(const NEHistogramKernel &) = delete;
-    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
-    NEHistogramKernel(NEHistogramKernel &&) = delete;
-    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
-    NEHistogramKernel &operator=(NEHistogramKernel &&) = delete;
-
-    /** Set the input image and the distribution output.
-     *
-     * @param[in]     input      Source image. Data type supported: U8.
-     * @param[out]    output     Destination distribution.
-     * @param[in,out] local_hist Array that the threads use to save their local histograms.
-     *                           It's size should be equal to (number_of_threads * num_bins),
-     *                           and the Window::thread_id() is used to determine the part of the array
-     *                           used by each thread.
-     * @param[out]    window_lut LUT with pre-calculated possible window values.
-     *                           The size of the LUT should be equal to max_range_size and it will be filled
-     *                           during the configure stage, while it re-used in every run, therefore can be
-     *                           safely shared among threads.
-     */
-    void configure(const IImage *input, IDistribution1D *output, uint32_t *local_hist, uint32_t *window_lut);
-    /** Set the input image and the distribution output.
-     *
-     * @note Used for histogram of fixed size equal to 256
-     *
-     * @param[in]  input  Source image. Data type supported: U8.
-     * @param[out] output Destination distribution which must be of 256 bins..
-     */
-    void configure(const IImage *input, IDistribution1D *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Function to merge multiple partial histograms.
-     *
-     * @param[out] global_hist Pointer to the final histogram.
-     * @param[in]  local_hist  Pointer to the partial histograms.
-     * @param[in]  bins        Number of bins.
-     */
-    void merge_histogram(uint32_t *global_hist, const uint32_t *local_hist, size_t bins);
-    /** Function to merge multiple minimum values of partial histograms.
-     *
-     * @param[out] global_min Pointer to the global min value.
-     * @param[in]  local_min  Local min value.
-     */
-    void merge_min(uint8_t *global_min, const uint8_t &local_min);
-    /** Function to perform histogram on the given window
-     *
-     * @param[in] win  Region on which to execute the kernel
-     * @param[in] info Info about the executing thread
-     */
-    void histogram_U8(Window win, const ThreadInfo &info);
-    /** Function to perform histogram on the given window where histogram is
-     *         of fixed size 256 without ranges and offsets.
-     *
-     * @param[in] win  Region on which to execute the kernel
-     * @param[in] info Info about the executing thread
-     */
-    void histogram_fixed_U8(Window win, const ThreadInfo &info);
-    /** Pre-calculate the pixel windowing for every possible pixel
-     *
-     * Calculate (V - offset) * numBins / range where V is every possible pixel value.
-     *
-     * @note We currently support U8 image thus possible pixel values are between 0 and 255
-     */
-    void calculate_window_lut() const;
-    /** Common signature for all the specialised Histogram functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using HistogramFunctionPtr = void (NEHistogramKernel::*)(Window window, const ThreadInfo &info);
-
-    HistogramFunctionPtr          _func; ///< Histogram function to use for the particular image types passed to configure()
-    const IImage                 *_input;
-    IDistribution1D              *_output;
-    uint32_t                     *_local_hist;
-    uint32_t                     *_window_lut;
-    arm_compute::Mutex            _hist_mtx;
-    static constexpr unsigned int _max_range_size{ 256 }; ///< 256 possible pixel values as we handle only U8 images
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEHISTOGRAMKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
deleted file mode 100644
index 95825ade18..0000000000
--- a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEIM2COLKERNEL_H
-#define ARM_COMPUTE_NEIM2COLKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-class Size2D;
-
-/** Interface for the im2col reshape kernel.
- *
- * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column.
- * It is used to transform a convolution to a plain matrix multiplication.
- *
- * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have:
- *
- * @f[
- * \left( \begin{array}{cccc}
- * a00 & a01 & a02 & a03 \\
- * a10 & a11 & a12 & a13 \\
- * a20 & a21 & a22 & a23 \\
- * a30 & a31 & a32 & a33 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{ccccccccc}
- * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\
- * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\
- * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\
- * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\
- * \end{array} \right)
- * @f]
- */
-class NEIm2ColKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEIm2ColKernel";
-    }
-    /** Default constructor */
-    NEIm2ColKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEIm2ColKernel(const NEIm2ColKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEIm2ColKernel &operator=(const NEIm2ColKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEIm2ColKernel(NEIm2ColKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEIm2ColKernel &operator=(NEIm2ColKernel &&) = default;
-    /** Default destructor */
-    ~NEIm2ColKernel() = default;
-
-    /** Set the input and output of the kernel.
-     *
-     * @param[in]  input       The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
-     *                         while every optional dimension from 4 and above represent a batch of inputs.
-     *                         Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32
-     *                         Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false
-     * @param[out] output      The output tensor. Data types supported: Same as @p input
-     * @param[in]  kernel_dims The kernel dimensions (width and height).
-     * @param[in]  conv_info   Contains padding and stride information described in @ref PadStrideInfo.
-     * @param[in]  has_bias    In case biases are provided expands the matrix with 1.
-     * @param[in]  dilation    (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
-     * @param[in]  num_groups  (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
-     */
-    void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
-                   bool has_bias, const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEIm2ColKernel
-     *
-     * @param[in] input       The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
-     *                        while every optional dimension from 4 and above represent a batch of inputs.
-     *                        Data types supported: QASYMM8/QASYMM8_SIGNED/BFLOAT16/F16/F32
-     *                        Note: QASYMM8/QASYMM8_SIGNED works only for has_bias = false
-     * @param[in] output      The output tensor. Data types supported: Same as @p input
-     * @param[in] kernel_dims The kernel dimensions (width and height).
-     * @param[in] conv_info   Contains padding and stride information described in @ref PadStrideInfo.
-     * @param[in] has_bias    In case biases are provided expands the matrix with 1.
-     * @param[in] dilation    (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
-     * @param[in] num_groups  (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info,
-                           bool has_bias, const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Template function to run im2col
-     *
-     * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
-     */
-    template <typename T, bool has_pads, bool is_nchw>
-    void run_im2col(const Window &window);
-
-    /** Common signature for all the specialised im2col functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using Im2ColFunctionPtr = void (NEIm2ColKernel::*)(const Window &window);
-
-    Im2ColFunctionPtr _func;
-    const ITensor    *_input;
-    ITensor          *_output;
-    std::pair<unsigned int, unsigned int> _convolved_dims;
-    PadStrideInfo _conv_info;
-    unsigned int  _kernel_width;
-    unsigned int  _kernel_height;
-    bool          _has_bias;
-    Size2D        _dilation;
-    DataLayout    _data_layout;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEIM2COLKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h
deleted file mode 100644
index a5bd453ac7..0000000000
--- a/arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-struct InstanceNormalizationLayerKernelInfo;
-
-/** Interface for performing an instance normalization */
-class NEInstanceNormalizationLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEInstanceNormalizationLayerKernel";
-    }
-    /** Default constructor */
-    NEInstanceNormalizationLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEInstanceNormalizationLayerKernel(const NEInstanceNormalizationLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEInstanceNormalizationLayerKernel &operator=(const NEInstanceNormalizationLayerKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEInstanceNormalizationLayerKernel(NEInstanceNormalizationLayerKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEInstanceNormalizationLayerKernel &operator=(NEInstanceNormalizationLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEInstanceNormalizationLayerKernel() = default;
-    /** Set the input and output tensors.
-     *
-     * @param[in, out] input  Source tensor. Data types supported: F16/F32. Data layout supported: NCHW
-     *                        In case of @p output tensor = nullptr this tensor will store the result of the normalization.
-     * @param[out]     output Destination tensor. Data types and data layouts supported: same as @p input.
-     * @param[in]      info   Kernel meta-data descriptor
-     */
-    void configure(ITensor *input, ITensor *output, const InstanceNormalizationLayerKernelInfo &info);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref NEInstanceNormalizationLayer.
-     *
-     * @param[in] input  Source tensor info. Data types supported: F16/F32. Data layout supported: NCHW
-     * @param[in] output Destination tensor info. Data types and data layouts supported: same as @p input.
-     * @param[in] info   Kernel meta-data descriptor
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const InstanceNormalizationLayerKernelInfo &info);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Common signature for all the specialized instance normalization functions
-     *
-     * @param[in, out] input   An input tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization.
-     * @param[out]     output  The output tensor.
-     * @param[in]      gamma   The scale scalar value applied to the normalized tensor. Defaults to 1.0
-     * @param[in]      beta    The offset scalar value applied to the normalized tensor. Defaults to 0.0
-     * @param[in]      epsilon Lower bound value for the normalization. Defaults to 1e-12
-     */
-    using NormalizationFunction = void(ITensor *input, ITensor *output, float gamma, float beta, float epsilon, const Window &window);
-
-    NormalizationFunction *_func;
-    ITensor               *_input;
-    ITensor               *_output;
-    float                  _gamma;
-    float                  _beta;
-    float                  _epsilon;
-    bool                   _use_mixed_precision{ true };
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h b/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h
deleted file mode 100644
index 57f24befdb..0000000000
--- a/arm_compute/core/NEON/kernels/NEIntegralImageKernel.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H
-#define ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Kernel to perform an image integral on an image */
-class NEIntegralImageKernel : public INESimpleKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEIntegralImageKernel";
-    }
-    /** Set the source, destination and border mode of the kernel
-     *
-     * @param[in]  input  Source tensor. Data type supported: U8
-     * @param[out] output Destination tensor. Data type supported: U32
-     */
-    void configure(const ITensor *input, ITensor *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-    bool       is_parallelisable() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEINTEGRALIMAGEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h b/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h
deleted file mode 100644
index 302d04e9f3..0000000000
--- a/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H
-#define ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for performing a L2 normalize on a given axis given the square sum of it in this axis */
-class NEL2NormalizeLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEL2NormalizeLayerKernel";
-    }
-    /** Default constructor */
-    NEL2NormalizeLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEL2NormalizeLayerKernel(const NEL2NormalizeLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEL2NormalizeLayerKernel &operator=(const NEL2NormalizeLayerKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEL2NormalizeLayerKernel(NEL2NormalizeLayerKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEL2NormalizeLayerKernel &operator=(NEL2NormalizeLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEL2NormalizeLayerKernel() = default;
-    /** Set the input and output tensors.
-     *
-     * @param[in]  input   Source tensor. Data types supported: F16/F32.
-     * @param[in]  sum     Sum values tensor. Data types supported: same as @p input.
-     *                     Sum will have the same number of dimensions as input.
-     * @param[out] output  Destination tensor. Data types and data layouts supported: same as @p input.
-     *                     Output will have the same number of dimensions as input.
-     * @param[in]  axis    Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
-     * @param[in]  epsilon Lower bound value for the normalization.
-     */
-    void configure(const ITensor *input, const ITensor *sum, ITensor *output, int axis, float epsilon);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref NEL2NormalizeLayerKernel.
-     *
-     * @param[in] input   Source tensor info. Data types supported: F16/F32.
-     * @param[in] sum     Sum values tensor info. Data types supported: same as @p input.
-     *                    Sum will have the same number of dimensions as input.
-     * @param[in] output  Destination tensor info. Data types and data layouts supported: same as @p input.
-     *                    Output will have the same number of dimensions as input.
-     * @param[in] axis    Axis along which to reduce. Negative values wrap around. Maximum supported actual reduction axis : 2
-     * @param[in] epsilon Lower bound value for the normalization.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output, int axis, float epsilon);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input;
-    const ITensor *_sum;
-    ITensor       *_output;
-    unsigned int   _actual_axis;
-    float          _epsilon;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEL2NORMALIZELAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NELKTrackerKernel.h b/arm_compute/core/NEON/kernels/NELKTrackerKernel.h
deleted file mode 100644
index 90e5f41f8a..0000000000
--- a/arm_compute/core/NEON/kernels/NELKTrackerKernel.h
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_LKTRACKERKERNEL_H
-#define ARM_COMPUTE_LKTRACKERKERNEL_H
-
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstddef>
-#include <cstdint>
-#include <tuple>
-#include <utility>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Internal keypoint class for Lucas-Kanade Optical Flow */
-struct NELKInternalKeypoint
-{
-    float x{ 0.f };                 /**< x coordinate of the keypoint */
-    float y{ 0.f };                 /**< y coordinate of the keypoint */
-    bool  tracking_status{ false }; /**< the tracking status of the keypoint */
-};
-
-/** Interface for NEON Array of Internal Key Points. */
-using INELKInternalKeypointArray = IArray<NELKInternalKeypoint>;
-
-/** Interface for the Lucas-Kanade tracker kernel */
-class NELKTrackerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NELKTrackerKernel";
-    }
-    /** Default constructor */
-    NELKTrackerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NELKTrackerKernel(const NELKTrackerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NELKTrackerKernel &operator=(const NELKTrackerKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NELKTrackerKernel(NELKTrackerKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NELKTrackerKernel &operator=(NELKTrackerKernel &&) = default;
-    /** Default destructor */
-    ~NELKTrackerKernel() = default;
-
-    /** Initialise the kernel input and output
-     *
-     * @param[in]      input_old            Pointer to the input old tensor. Data type supported: U8
-     * @param[in]      input_new            Pointer to the input new tensor. Data type supported. U8
-     * @param[in]      old_scharr_gx        Pointer to the input scharr X tensor. Data type supported: S16
-     * @param[in]      old_scharr_gy        Pointer to the input scharr Y tensor. Data type supported: S16
-     * @param[in]      old_points           Pointer to the IKeyPointArray storing old key points
-     * @param[in]      new_points_estimates Pointer to the IKeyPointArray storing new estimates key points
-     * @param[out]     new_points           Pointer to the IKeyPointArray storing new key points
-     * @param[in, out] old_points_internal  Pointer to the array of NELKInternalKeypoint for old points
-     * @param[out]     new_points_internal  Pointer to the array of NELKInternalKeypoint for new points
-     * @param[in]      termination          The criteria to terminate the search of each keypoint.
-     * @param[in]      use_initial_estimate The flag to indicate whether the initial estimated position should be used
-     * @param[in]      epsilon              The error for terminating the algorithm
-     * @param[in]      num_iterations       The maximum number of iterations before terminate the algorithm
-     * @param[in]      window_dimension     The size of the window on which to perform the algorithm
-     * @param[in]      level                The pyramid level
-     * @param[in]      num_levels           The number of pyramid levels
-     * @param[in]      pyramid_scale        Scale factor used for generating the pyramid
-     */
-    void configure(const ITensor *input_old, const ITensor *input_new, const ITensor *old_scharr_gx, const ITensor *old_scharr_gy,
-                   const IKeyPointArray *old_points, const IKeyPointArray *new_points_estimates, IKeyPointArray *new_points,
-                   INELKInternalKeypointArray *old_points_internal, INELKInternalKeypointArray *new_points_internal,
-                   Termination termination, bool use_initial_estimate, float epsilon, unsigned int num_iterations, size_t window_dimension,
-                   size_t level, size_t num_levels, float pyramid_scale);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    /** Initialise the array of keypoints in the provide range
-     *
-     * @param[in] start Index of first element in the keypoints array to be initialised
-     * @param[in] end   Index after last elelemnt in the keypoints array to be initialised
-     */
-    void init_keypoints(int start, int end);
-    /** Compute the structure tensor A^T * A based on the scharr gradients I_x and I_y
-     *
-     * @param[in]  keypoint    Keypoint for which gradients are computed
-     * @param[out] bilinear_ix Intermediate interpolated data for X gradient
-     * @param[out] bilinear_iy Intermediate interpolated data for Y gradient
-     *
-     * @return Values A11, A12, A22
-     */
-    std::tuple<int, int, int> compute_spatial_gradient_matrix(const NELKInternalKeypoint &keypoint, int32_t *bilinear_ix, int32_t *bilinear_iy);
-    /** Compute the vector A^T * b, i.e. -sum(I_d * I_t) for d in {x,y}
-     *
-     * @param[in] old_keypoint Old keypoint for which gradient is computed
-     * @param[in] new_keypoint New keypoint for which gradient is computed
-     * @param[in] bilinear_ix  Intermediate interpolated data for X gradient
-     * @param[in] bilinear_iy  Intermediate interpolated data for Y gradient
-     *
-     * @return Values b1, b2
-     */
-    std::pair<int, int> compute_image_mismatch_vector(const NELKInternalKeypoint &old_keypoint, const NELKInternalKeypoint &new_keypoint, const int32_t *bilinear_ix, const int32_t *bilinear_iy);
-
-    const ITensor              *_input_old;
-    const ITensor              *_input_new;
-    const ITensor              *_old_scharr_gx;
-    const ITensor              *_old_scharr_gy;
-    IKeyPointArray             *_new_points;
-    const IKeyPointArray       *_new_points_estimates;
-    const IKeyPointArray       *_old_points;
-    INELKInternalKeypointArray *_old_points_internal;
-    INELKInternalKeypointArray *_new_points_internal;
-    Termination                 _termination;
-    bool                        _use_initial_estimate;
-    float                       _pyramid_scale;
-    float                       _epsilon;
-    unsigned int                _num_iterations;
-    int                         _window_dimension;
-    unsigned int                _level;
-    unsigned int                _num_levels;
-    ValidRegion                 _valid_region;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NELKTRACKERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h
deleted file mode 100644
index ba14598135..0000000000
--- a/arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H
-#define ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to multiply each row of first tensor with low 2 dimensions of second tensor. */
-class NELocallyConnectedMatrixMultiplyKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NELocallyConnectedMatrixMultiplyKernel";
-    }
-    /** Default constructor */
-    NELocallyConnectedMatrixMultiplyKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NELocallyConnectedMatrixMultiplyKernel(const NELocallyConnectedMatrixMultiplyKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NELocallyConnectedMatrixMultiplyKernel &operator=(const NELocallyConnectedMatrixMultiplyKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NELocallyConnectedMatrixMultiplyKernel(NELocallyConnectedMatrixMultiplyKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NELocallyConnectedMatrixMultiplyKernel &operator=(NELocallyConnectedMatrixMultiplyKernel &&) = default;
-    /** Initialise the kernel's input and output
-     *
-     * @param[in]  input0 First input tensor. Data types supported: F16, F32
-     * @param[in]  input1 Second input tensor containing the Matrix B. Data type supported: same as @p input0
-     * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
-     */
-    void configure(const ITensor *input0, const ITensor *input1, ITensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NELocallyConnectedMatrixMultiplyKernel
-     *
-     * @param[in] input0 First input tensor info. Data types supported: F16, F32
-     * @param[in] input1 Second input tensor info. Data type supported: same as @p input0
-     * @param[in] output Output tensor info. Data type supported: same as @p input0
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input0;
-    const ITensor *_input1;
-    ITensor       *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NELOCALLYCONNECTEDMATRIXMULTIPLYKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h b/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h
deleted file mode 100644
index ea42a38994..0000000000
--- a/arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H
-#define ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Template interface for the kernel to compute magnitude and phase */
-template <MagnitudeType mag_type, PhaseType phase_type>
-class NEMagnitudePhaseKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEMagnitudePhaseKernel";
-    }
-    /** Default constructor */
-    NEMagnitudePhaseKernel();
-    /** Destructor */
-    ~NEMagnitudePhaseKernel() = default;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEMagnitudePhaseKernel(const NEMagnitudePhaseKernel &) = delete;
-    /** Default move constructor */
-    NEMagnitudePhaseKernel(NEMagnitudePhaseKernel &&) = default;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEMagnitudePhaseKernel &operator=(const NEMagnitudePhaseKernel &) = delete;
-    /** Default move assignment operator */
-    NEMagnitudePhaseKernel &operator=(NEMagnitudePhaseKernel &&) = default;
-
-    /** Initialise the kernel's input, output.
-     *
-     * @note At least one of out1 or out2 must be set
-     *
-     * @param[in]  gx        Gradient X tensor. Data type supported: S16.
-     * @param[in]  gy        Gradient Y tensor. Data type supported: S16.
-     * @param[out] magnitude (Optional) The output tensor - Magnitude. Data type supported: S16.
-     * @param[out] phase     (Optional) The output tensor - Phase. Data type supported: U8.
-     */
-    void configure(const ITensor *gx, const ITensor *gy, ITensor *magnitude, ITensor *phase);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Function to perform magnitude on the given window
-     *
-     * @param[in] window Region on which to execute the kernel
-     */
-    void magnitude(const Window &window);
-    /** Function to perform phase on the given window
-     *
-     * @param[in] window Region on which to execute the kernel
-     */
-    void phase(const Window &window);
-    /** Function to perform magnitude and phase on the given window
-     *
-     * @param[in] window Region on which to execute the kernel
-     */
-    void magnitude_phase(const Window &window);
-
-private:
-    /** Common signature for all the specialised MagnitudePhase functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using MagnitudePhaseFunctionPtr = void (NEMagnitudePhaseKernel::*)(const Window &window);
-    /** MagnitudePhase function to use for the particular formats passed to configure() */
-    MagnitudePhaseFunctionPtr _func;
-    const ITensor            *_gx;        /**< Input gradient X */
-    const ITensor            *_gy;        /**< Input gradient Y */
-    ITensor                  *_magnitude; /**< Output - Magnitude */
-    ITensor                  *_phase;     /**< Output - Phase */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEMAGNITUDEPHASEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h
deleted file mode 100644
index f3ea049a87..0000000000
--- a/arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMAXUNPOOLINGLAYERKERNEL_H
-#define ARM_COMPUTE_NEMAXUNPOOLINGLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the pooling layer kernel */
-class NEMaxUnpoolingLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEMaxUnpoolingLayerKernel";
-    }
-    /** Default constructor */
-    NEMaxUnpoolingLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEMaxUnpoolingLayerKernel(const NEMaxUnpoolingLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEMaxUnpoolingLayerKernel &operator=(const NEMaxUnpoolingLayerKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEMaxUnpoolingLayerKernel(NEMaxUnpoolingLayerKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEMaxUnpoolingLayerKernel &operator=(NEMaxUnpoolingLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEMaxUnpoolingLayerKernel() = default;
-    /** Set the input and output tensors.
-     *
-     * @note Output shape must be equal to the shape of the original input to pool.
-     *
-     * @param[in]  input     Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in]  indices   Tensor containing the offset to store the input elements in the output tensor.
-     *                       @ref NEPoolingLayerKernel with indices should precede this function in order to
-     *                       properly reconstruct the output tensor.
-     *                       The tensor shape of this tensor has to be equal to the input tensor shape. Data type supported: U32.
-     * @param[out] output    Destination tensor. Data types supported: Same as @p input.
-     * @param[in]  pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
-     */
-    void configure(const ITensor *input, const ITensor *indices, ITensor *output, const PoolingLayerInfo &pool_info);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEMaxUnpoolingLayerKernel
-     *
-     * @param[in] input     Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in] output    Destination tensor info. Data types supported: Same as @p input.
-     * @param[in] indices   Tensor info of the indices of the maximal values. Data type supported: U32.
-     * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *indices, const ITensorInfo *output, const PoolingLayerInfo &pool_info);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Function to perform 2x2 pooling and compute the pooling indices. The indices can be used for max unpool.
-     *
-     * @param[in] window_input Input region on which to execute the kernel.
-     */
-    template <typename T>
-    void unpooling2(const Window &window_input);
-
-    using UnpoolingFunction = void (NEMaxUnpoolingLayerKernel::*)(const Window &window);
-
-private:
-    UnpoolingFunction _func;
-    const ITensor    *_input;
-    ITensor          *_output;
-    const ITensor    *_indices;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEMAXUNPOOLINGLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h b/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h
deleted file mode 100644
index eef0e2b586..0000000000
--- a/arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMEANSTDDEVKERNEL_H
-#define ARM_COMPUTE_NEMEANSTDDEVKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "support/Mutex.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-using IImage = ITensor;
-
-/** Interface for the kernel to calculate mean and standard deviation of input image pixels. */
-class NEMeanStdDevKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEMeanStdDevKernel";
-    }
-    /** Default constructor */
-    NEMeanStdDevKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEMeanStdDevKernel(const NEMeanStdDevKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEMeanStdDevKernel &operator=(const NEMeanStdDevKernel &) = delete;
-    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
-    NEMeanStdDevKernel(NEMeanStdDevKernel &&) = delete;
-    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
-    NEMeanStdDevKernel &operator=(NEMeanStdDevKernel &&) = delete;
-    /** Default destructor */
-    ~NEMeanStdDevKernel() = default;
-
-    /** Initialise the kernel's input and outputs.
-     *
-     * @param[in]  input              Input image. Data type supported: U8.
-     * @param[out] mean               Input average pixel value.
-     * @param[out] global_sum         Keeps global sum of pixel values.
-     * @param[out] stddev             (Optional) Output standard deviation of pixel values.
-     * @param[out] global_sum_squared (Optional if stddev is not set, required if stddev is set) Keeps global sum of squared pixel values.
-     */
-    void configure(const IImage *input, float *mean, uint64_t *global_sum, float *stddev = nullptr, uint64_t *global_sum_squared = nullptr);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-    BorderSize border_size() const override;
-
-private:
-    const IImage      *_input;
-    float             *_mean;
-    float             *_stddev;
-    uint64_t          *_global_sum;
-    uint64_t          *_global_sum_squared;
-    arm_compute::Mutex _mtx;
-    BorderSize         _border_size;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEMEANSTDDEVKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h b/arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h
deleted file mode 100644
index 66b907541e..0000000000
--- a/arm_compute/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H
-#define ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-#include <arm_fp16.h>
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to normalize the input 2D tensor across the first dimension with respect to mean and standard deviation of the same dimension. */
-class NEMeanStdDevNormalizationKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEMeanStdDevNormalizationKernel";
-    }
-    /** Default constructor */
-    NEMeanStdDevNormalizationKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEMeanStdDevNormalizationKernel(const NEMeanStdDevNormalizationKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEMeanStdDevNormalizationKernel &operator=(const NEMeanStdDevNormalizationKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEMeanStdDevNormalizationKernel(NEMeanStdDevNormalizationKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEMeanStdDevNormalizationKernel &operator=(NEMeanStdDevNormalizationKernel &&) = default;
-    /** Default destructor */
-    ~NEMeanStdDevNormalizationKernel() = default;
-    /** Initialise the kernel's input and outputs.
-     *
-     * @note If the output tensor is a nullptr, the normalization will be performed in-place.
-     *
-     * @param[in, out] input   Source tensor with 2 dimensions. In case of @p output tensor = nullptr,
-     *                         this tensor will store the result of the normalization. Data types supported: F16/F32.
-     * @param[out]     output  (Optional) Destination tensor. It can be nullptr in case of in-place computation. Data type supported: same as @p input
-     * @param[in]      epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
-     */
-    void configure(ITensor *input, ITensor *output = nullptr, float epsilon = 1e-8f);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEMeanStdDevNormalizationKernel
-     *
-     * @param[in] input   Source tensor info with 2 dimensions. In case of @p output tensor info = nullptr,
-     *                    this tensor will store the result of the normalization. Data types supported: F16/F32.
-     * @param[in] output  (Optional) Destination tensor info. It can be nullptr in case of in-place computation. Data type supported: same as @p input
-     * @param[in] epsilon (Optional) Small float to avoid division by zero in case of zero standard deviation. Defaults to 1e-8.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output = nullptr, float epsilon = 1e-8f);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Normalizes the input with respect to mean and standard deviation.
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    template <typename ScalarType, int size>
-    void mean_stddev_normalization(const Window &window);
-
-    ITensor *_input;
-    ITensor *_output;
-    float    _epsilon;
-
-    using MeanStdDevNormFunction = void (NEMeanStdDevNormalizationKernel::*)(const Window &window);
-
-    MeanStdDevNormFunction _func;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEMEANSTDDEVNORMALIZATIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h b/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h
deleted file mode 100644
index f2871e2ab5..0000000000
--- a/arm_compute/core/NEON/kernels/NEMedian3x3Kernel.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMEDIAN3x3KERNEL_H
-#define ARM_COMPUTE_NEMEDIAN3x3KERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Kernel to perform a median filter on a tensor */
-class NEMedian3x3Kernel : public INESimpleKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEMedian3x3Kernel";
-    }
-    /** Set the source, destination and border mode of the kernel
-     *
-     * @param[in]  input            Source tensor. Data type supported: U8
-     * @param[out] output           Destination tensor. Data type supported: U8
-     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
-     */
-    void configure(const ITensor *input, ITensor *output, bool border_undefined);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEMEDIAN3x3KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEMemsetKernel.h b/arm_compute/core/NEON/kernels/NEMemsetKernel.h
deleted file mode 100644
index f9a1914360..0000000000
--- a/arm_compute/core/NEON/kernels/NEMemsetKernel.h
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMEMSETKERNEL_H
-#define ARM_COMPUTE_NEMEMSETKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for filling the planes of a tensor */
-class NEMemsetKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEMemsetKernel";
-    }
-    /** Default constructor */
-    NEMemsetKernel();
-    /** Default destructor */
-    ~NEMemsetKernel() = default;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEMemsetKernel(const NEMemsetKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEMemsetKernel &operator=(const NEMemsetKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEMemsetKernel(NEMemsetKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEMemsetKernel &operator=(NEMemsetKernel &&) = default;
-    /** Initialise the kernel's tensor and filling value
-     *
-     * @param[in,out] tensor         Input tensor to fill. Supported data types: All
-     * @param[in]     constant_value The value used to fill the planes of the tensor
-     */
-    void configure(ITensor *tensor, const PixelValue &constant_value);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    ITensor   *_tensor;
-    PixelValue _constant_value;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEMEMSETKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h b/arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h
deleted file mode 100644
index e7e87e9339..0000000000
--- a/arm_compute/core/NEON/kernels/NEMinMaxLayerKernel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NEMINMAXLAYERKERNEL_H
-#define ARM_COMPUTE_NEMINMAXLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "support/Mutex.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform min max search on a 3D tensor. */
-class NEMinMaxLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEMinMaxLayerKernel";
-    }
-    /** Default constructor */
-    NEMinMaxLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEMinMaxLayerKernel(const NEMinMaxLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEMinMaxLayerKernel &operator=(const NEMinMaxLayerKernel &) = delete;
-    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
-    NEMinMaxLayerKernel(NEMinMaxLayerKernel &&) = delete;
-    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
-    NEMinMaxLayerKernel &operator=(NEMinMaxLayerKernel &&) = delete;
-    /** Default destructor */
-    ~NEMinMaxLayerKernel() = default;
-
-    /** Initialise the kernel's input and outputs.
-     *
-     * @note output[0] = minimum
-     * @note output[1] = maximum
-     *
-     * @param[in]  input  Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data type supported: F32.
-     * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum value for each 3D input tensor.
-     *                    The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32
-     */
-    void configure(const ITensor *input, ITensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref CLMinMaxLayerKernel
-     *
-     * @param[in] input  Input tensor info.  Data types supported: F32.
-     * @param[in] output Output tensor info with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor.
-     *                   The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-    /** Resets global minimum and maximum. */
-    void reset();
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    void update_min_max(float *out_ptr, float min, float max);
-    const ITensor     *_input;
-    ITensor           *_output;
-    arm_compute::Mutex _mtx;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEMINMAXLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h b/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h
deleted file mode 100644
index 83f5afce72..0000000000
--- a/arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H
-#define ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H
-
-#include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "support/Mutex.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-using IImage = ITensor;
-
-/** Interface for the kernel to perform min max search on an image. */
-class NEMinMaxKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEMinMaxKernel";
-    }
-    /** Default constructor */
-    NEMinMaxKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEMinMaxKernel(const NEMinMaxKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEMinMaxKernel &operator=(const NEMinMaxKernel &) = delete;
-    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
-    NEMinMaxKernel(NEMinMaxKernel &&) = delete;
-    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
-    NEMinMaxKernel &operator=(NEMinMaxKernel &&) = delete;
-    /** Default destructor */
-    ~NEMinMaxKernel() = default;
-
-    /** Initialise the kernel's input and outputs.
-     *
-     * @param[in]  input Input Image. Data types supported: U8/S16/F32.
-     * @param[out] min   Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
-     * @param[out] max   Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
-     */
-    void configure(const IImage *input, void *min, void *max);
-    /** Resets global minimum and maximum. */
-    void reset();
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Performs the min/max algorithm on U8 images on a given window.
-     *
-     * @param win The window to run the algorithm on.
-     */
-    void minmax_U8(Window win);
-    /** Performs the min/max algorithm on S16 images on a given window.
-     *
-     * @param win The window to run the algorithm on.
-     */
-    void minmax_S16(Window win);
-    /** Performs the min/max algorithm on F32 images on a given window.
-     *
-     * @param win The window to run the algorithm on.
-     */
-    void minmax_F32(Window win);
-    /** Common signature for all the specialised MinMax functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using MinMaxFunction = void (NEMinMaxKernel::*)(Window window);
-    /** MinMax function to use for the particular image types passed to configure() */
-    MinMaxFunction _func;
-    /** Helper to update min/max values **/
-    template <typename T>
-    void update_min_max(T min, T max);
-
-    const IImage      *_input; /**< Input image. */
-    void              *_min;   /**< Minimum value. */
-    void              *_max;   /**< Maximum value. */
-    arm_compute::Mutex _mtx;   /**< Mutex used for result reduction. */
-};
-
-/** Interface for the kernel to find min max locations of an image. */
-class NEMinMaxLocationKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEMinMaxLocationKernel";
-    }
-    /** Default constructor */
-    NEMinMaxLocationKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEMinMaxLocationKernel(const NEMinMaxLocationKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEMinMaxLocationKernel &operator=(const NEMinMaxLocationKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEMinMaxLocationKernel(NEMinMaxLocationKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEMinMaxLocationKernel &operator=(NEMinMaxLocationKernel &&) = default;
-    /** Default destructor */
-    ~NEMinMaxLocationKernel() = default;
-
-    /** Initialise the kernel's input and outputs.
-     *
-     * @param[in]  input     Input Image. Data types supported: U8/S16/F32.
-     * @param[out] min       Minimum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
-     * @param[out] max       Maximum value of image. Data types supported: S32 if input type is U8/S16, F32 if input type is F32.
-     * @param[out] min_loc   Array of minimum value locations.
-     * @param[out] max_loc   Array of maximum value locations.
-     * @param[out] min_count Number of minimum value encounters.
-     * @param[out] max_count Number of maximum value encounters.
-     */
-    void configure(const IImage *input, void *min, void *max,
-                   ICoordinates2DArray *min_loc = nullptr, ICoordinates2DArray *max_loc = nullptr,
-                   uint32_t *min_count = nullptr, uint32_t *max_count = nullptr);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    bool is_parallelisable() const override;
-
-private:
-    /** Performs the min/max location algorithm on T type images on a given window.
-     *
-     * @param win The window to run the algorithm on.
-     */
-    template <class T, bool count_min, bool count_max, bool loc_min, bool loc_max>
-    void minmax_loc(const Window &win);
-    /** Common signature for all the specialised MinMaxLoc functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using MinMaxLocFunction = void (NEMinMaxLocationKernel::*)(const Window &window);
-    /** MinMaxLoc function to use for the particular image types passed to configure() */
-    MinMaxLocFunction _func;
-    /** Helper to create a function pointer table for the parameterized MinMaxLocation functions. */
-    template <class T, typename>
-    struct create_func_table;
-
-    const IImage        *_input;     /**< Input image. */
-    void                *_min;       /**< Minimum value. */
-    void                *_max;       /**< Maximum value. */
-    uint32_t            *_min_count; /**< Count of minimum value encounters. */
-    uint32_t            *_max_count; /**< Count of maximum value encounters. */
-    ICoordinates2DArray *_min_loc;   /**< Locations of minimum values. */
-    ICoordinates2DArray *_max_loc;   /**< Locations of maximum values. */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEMINMAXLOCATIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h b/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h
deleted file mode 100644
index 5fc225c910..0000000000
--- a/arm_compute/core/NEON/kernels/NENonLinearFilterKernel.h
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NENONLINEARFILTERKERNEL_H
-#define ARM_COMPUTE_NENONLINEARFILTERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to apply a non-linear filter */
-class NENonLinearFilterKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NENonLinearFilterKernel";
-    }
-    /** Default constructor */
-    NENonLinearFilterKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NENonLinearFilterKernel(NENonLinearFilterKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NENonLinearFilterKernel(NENonLinearFilterKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NENonLinearFilterKernel &operator=(NENonLinearFilterKernel &&) = default;
-    /** Set the source, destination and border mode of the kernel
-     *
-     * @param[in]  input            Source tensor. Data type supported: U8
-     * @param[out] output           Destination tensor. Data type supported: U8
-     * @param[in]  function         Non linear function to perform
-     * @param[in]  mask_size        Mask size. Supported sizes: 3, 5
-     * @param[in]  pattern          Mask pattern
-     * @param[in]  mask             The given mask. Will be used only if pattern is specified to PATTERN_OTHER
-     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
-     */
-    void configure(const ITensor *input, ITensor *output, NonLinearFilterFunction function, unsigned int mask_size, MatrixPattern pattern, const uint8_t *mask, bool border_undefined);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    /** Fill mask with the corresponding given pattern.
-     *
-     * @param[in,out] mask    Mask to be filled according to pattern
-     * @param[in]     cols    Columns (width) of mask
-     * @param[in]     rows    Rows (height) of mask
-     * @param[in]     pattern Pattern to fill the mask according to
-     */
-    void fill_mask(uint8_t *mask, int cols, int rows, MatrixPattern pattern);
-    /** Apply a median filter when given mask pattern is defined as box.
-     *
-     * @param[in] win Window to apply the filter on.
-     */
-    template <int mask_w, int mask_h>
-    void median_filter_box(const Window &win);
-    /** Apply a min filter when given mask pattern is defined as box.
-     *
-     * @param[in] win Window to apply the filter on.
-     */
-    template <int mask_w, int mask_h>
-    void min_filter_box(const Window &win);
-    /** Apply a max filter when given mask pattern is defined as box.
-     *
-     * @param[in] win Window to apply the filter on.
-     */
-    template <int mask_w, int mask_h>
-    void max_filter_box(const Window &win);
-    /** Apply a median filter when given mask pattern is defined as cross.
-     *
-     * @param[in] win Window to apply the filter on.
-     */
-    template <int mask_w, int mask_h>
-    void median_filter_cross(const Window &win);
-    /** Apply a min filter when given mask pattern is defined as cross.
-     *
-     * @param[in] win Window to apply the filter on.
-     */
-    template <int mask_w, int mask_h>
-    void min_filter_cross(const Window &win);
-    /** Apply a max filter when given mask pattern is defined as cross.
-     *
-     * @param[in] win Window to apply the filter on.
-     */
-    template <int mask_w, int mask_h>
-    void max_filter_cross(const Window &win);
-    /** Apply a median filter when given mask pattern is defined as disk.
-     *
-     * @param[in] win Window to apply the filter on.
-     */
-    template <int mask_w, int mask_h>
-    void median_filter_disk(const Window &win);
-    /** Apply a min filter when given mask pattern is defined as disk.
-     *
-     * @param[in] win Window to apply the filter on.
-     */
-    template <int mask_w, int mask_h>
-    void min_filter_disk(const Window &win);
-    /** Apply a max filter when given mask pattern is defined as disk.
-     *
-     * @param[in] win Window to apply the filter on.
-     */
-    template <int mask_w, int mask_h>
-    void max_filter_disk(const Window &win);
-    /** Apply a non-linear filter when given mask has user-defined pattern.
-     *
-     * @param[in] win Window to apply the filter on.
-     */
-    template <int mask_w, int mask_h>
-    void non_linear_filter_generic(const Window &win);
-
-private:
-    unsigned int            _border_width;
-    const ITensor          *_input;
-    ITensor                *_output;
-    const uint8_t          *_mask;
-    MatrixPattern           _pattern;
-    NonLinearFilterFunction _function;
-    unsigned int            _func_idx;
-    BorderSize              _border_size;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NENONLINEARFILTERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h b/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h
deleted file mode 100644
index bf5c520978..0000000000
--- a/arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H
-#define ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface to perform Non-Maxima suppression over a 3x3 window using NEON
- *
- * @note Used by @ref NEFastCorners and @ref NEHarrisCorners
- */
-class NENonMaximaSuppression3x3Kernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NENonMaximaSuppression3x3Kernel";
-    }
-    /** Default constructor */
-    NENonMaximaSuppression3x3Kernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NENonMaximaSuppression3x3Kernel(const NENonMaximaSuppression3x3Kernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NENonMaximaSuppression3x3Kernel &operator=(const NENonMaximaSuppression3x3Kernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NENonMaximaSuppression3x3Kernel(NENonMaximaSuppression3x3Kernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NENonMaximaSuppression3x3Kernel &operator=(NENonMaximaSuppression3x3Kernel &&) = default;
-    /** Default destructor */
-    ~NENonMaximaSuppression3x3Kernel() = default;
-
-    /** Initialise the kernel's sources, destinations and border mode.
-     *
-     * @param[in]  input            Source tensor. Data types supported: U8/F32
-     * @param[out] output           Destination tensor. Data types supported: same as @p input
-     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
-     */
-    void configure(const ITensor *input, ITensor *output, bool border_undefined);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-protected:
-    /** Common signature for all the specialised non-maxima suppression 3x3 functions
-     *
-     * @param[in]  input_ptr    Pointer to the input tensor.
-     * @param[out] output_ptr   Pointer to the output tensor
-     * @param[in]  input_stride Stride of the input tensor
-     */
-    using NonMaxSuppr3x3Function = void(const void *__restrict input_ptr, void *__restrict output_ptr, const uint32_t input_stride);
-
-    NonMaxSuppr3x3Function *_func;   /**< Non-Maxima suppression function to use for the particular tensor types passed to configure() */
-    const ITensor          *_input;  /**< Source tensor */
-    ITensor                *_output; /**< Destination tensor */
-};
-
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32
- */
-class NENonMaximaSuppression3x3FP16Kernel : public NENonMaximaSuppression3x3Kernel
-{
-public:
-    const char *name() const override
-    {
-        return "NENonMaximaSuppression3x3FP16Kernel";
-    }
-    /** Initialise the kernel's sources, destinations and border mode.
-     *
-     * @param[in]  input            Source tensor. Data types supported: U8/F32.
-     * @param[out] output           Destination tensor. Data types supported: same as @p input
-     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
-     */
-    void configure(const ITensor *input, ITensor *output, bool border_undefined);
-};
-#else  /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-/** NEON kernel to perform Non-Maxima suppression 3x3 with intermediate results in FP16 if the input data type is FP32 */
-using NENonMaximaSuppression3x3FP16Kernel = NENonMaximaSuppression3x3Kernel;
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-} // namespace arm_compute
-#endif /* _ARM_COMPUTE_NENONMAXIMASUPPRESSION3x3KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h
deleted file mode 100644
index 665b10244d..0000000000
--- a/arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the normalization layer kernel.
- */
-class NENormalizationLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NENormalizationLayerKernel";
-    }
-    /** Default constructor */
-    NENormalizationLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NENormalizationLayerKernel(const NENormalizationLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NENormalizationLayerKernel &operator=(const NENormalizationLayerKernel &) = delete;
-    /** Default Move Constructor. */
-    NENormalizationLayerKernel(NENormalizationLayerKernel &&) = default;
-    /** Default move assignment operator */
-    NENormalizationLayerKernel &operator=(NENormalizationLayerKernel &&) = default;
-    /** Default destructor */
-    ~NENormalizationLayerKernel() = default;
-    /** Set the input and output tensors.
-     *
-     * @param[in]  input         Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
-     *                           and an optional 4th dimension for batch of inputs. Data types supported: FP16/F32. Data layouts supported: NCHW/NHWC.
-     * @param[in]  input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM],
-     *                           Data type and layout supported: same as @p input.
-     * @param[out] output        Destination tensor. Output will have the same number of dimensions as input. Data type and layout supported: same as @p input.
-     * @param[in]  norm_info     Normalization layer information like the normalization type, normalization size and other parameters.
-     */
-    void configure(const ITensor *input, const ITensor *input_squared, ITensor *output, NormalizationLayerInfo norm_info);
-    /** Static function to check if given info will lead to a valid configuration of @ref NENormalizationLayerKernel
-     *
-     * @param[in] input         Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
-     *                          and an optional 4th dimension for batch of inputs. Data types supported: FP16/F32. Data layouts supported: NCHW/NHWC.
-     * @param[in] input_squared Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM],
-     *                          Data type and layout supported: same as @p input.
-     * @param[in] output        Destination tensor. Output will have the same number of dimensions as input. Data type and layout supported: same as @p input.
-     * @param[in] norm_info     Normalization layer information like the normalization type, normalization size and other parameters.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *input_squared, const ITensorInfo *output, NormalizationLayerInfo norm_info);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Function to perform normalization depending on the given template
-     *  dimension. The second template parameter specifies whether the
-     *  normalization has to be 1D or 2D.
-     *
-     * @note Only supported normalizations are:
-     *  - 1D over X or Z
-     *  - 2D over X and Y
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    template <typename T, unsigned int S, unsigned int dim, bool do_2D_norm>
-    void normalize_float(const Window &window);
-
-    /** Common signature for all the specialised normalization functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using NormalizationFunction = void (NENormalizationLayerKernel::*)(const Window &window);
-
-private:
-    NormalizationFunction  _func;
-    const ITensor         *_input;
-    const ITensor         *_input_squared;
-    ITensor               *_output;
-    NormalizationLayerInfo _norm_info;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NENORMALIZATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEPadLayerKernel.h b/arm_compute/core/NEON/kernels/NEPadLayerKernel.h
deleted file mode 100644
index 80daabb349..0000000000
--- a/arm_compute/core/NEON/kernels/NEPadLayerKernel.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEPADLAYERKERNEL_H
-#define ARM_COMPUTE_NEPADLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to add padding to a tensor
- *
- * Add padding given padding information
- */
-class NEPadLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEPadLayerKernel";
-    }
-    /** Default constructor */
-    NEPadLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEPadLayerKernel(const NEPadLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEPadLayerKernel &operator=(const NEPadLayerKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEPadLayerKernel(NEPadLayerKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEPadLayerKernel &operator=(NEPadLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEPadLayerKernel() = default;
-
-    /** Initialize the function
-     *
-     * @param[in]  input          Source tensor. Data types supported: All.
-     * @param[out] output         Output tensor. Data type supported: same as @p input
-     * @param[in]  padding        The padding for each spatial dimension of the input tensor. The pair padding[i]
-     *                            specifies the front and the end padding in the i-th dimension.
-     * @param[in]  constant_value (Optional) Constant value to be used for the padding
-     * @param[in]  mode           (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT.
-     *                           Only CONSTANT padding mode is currently supported
-     */
-    void configure(ITensor *input, ITensor *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT);
-    /**  Static function to check if given info will lead to a valid configuration of @ref NEPadLayer.
-     *
-     * @param[in] input          Source tensor info. Data types supported: All.
-     * @param[in] output         Output tensor info. Data type supported: same as @p input
-     * @param[in] padding        The padding for each spatial dimension of the input tensor. The pair padding[i]
-     *                           specifies the front and the end padding in the i-th dimension.
-     * @param[in] constant_value (Optional) Constant value to be used for the padding
-     * @param[in] mode           (Optional) Controls whether the padding should be filled with @p constant_value using CONSTANT.
-     *                           Only CONSTANT padding mode is currently supported
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PaddingList &padding, const PixelValue constant_value = PixelValue(), const PaddingMode mode = PaddingMode::CONSTANT);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Template function to run the padding function with constant padding
-     *
-     * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
-     */
-    template <typename T>
-    void run_pad_constant(const Window &window);
-
-    /** Function to run the padding function with constant padding for 3D input and 1D, 2D, 3D padding
-     *
-     * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
-     */
-    void run_pad_constant_uint8_3Dinput_3Dpad(const Window &window);
-
-    /** Common signature for all the specialised permute functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using PadFunctionPtr = void (NEPadLayerKernel::*)(const Window &window);
-
-    PadFunctionPtr _func;
-    const ITensor *_input;
-    ITensor       *_output;
-    PaddingList    _padding;
-    PixelValue     _constant_value;
-    PaddingMode    _mode;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEPADLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEPermuteKernel.h b/arm_compute/core/NEON/kernels/NEPermuteKernel.h
deleted file mode 100644
index 2f8af9373d..0000000000
--- a/arm_compute/core/NEON/kernels/NEPermuteKernel.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEPERMUTEKERNEL_H
-#define ARM_COMPUTE_NEPERMUTEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** NEON kernel to perform tensor permutation.
- *
- * Permutes given a permutation vector
- */
-class NEPermuteKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEPermuteKernel";
-    }
-    /** Default constructor */
-    NEPermuteKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEPermuteKernel(const NEPermuteKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEPermuteKernel &operator=(const NEPermuteKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEPermuteKernel(NEPermuteKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEPermuteKernel &operator=(NEPermuteKernel &&) = default;
-    /** Default destructor */
-    ~NEPermuteKernel() = default;
-
-    /** Set the input and output of the kernel.
-     *
-     * @note Arbitrary permutation vectors are supported with rank not greater than 4
-     *
-     * @param[in]  input  The input tensor to permute. Data types supported: All
-     * @param[out] output The output tensor. Data types supported: Same as @p input
-     * @param[in]  perm   Permutation vector
-     */
-    void configure(const ITensor *input, ITensor *output, const PermutationVector &perm);
-    /** Static function to check if given info will lead to a valid configuration of @ref CPPPermuteKernel
-     *
-     * @note Arbitrary permutation vectors are supported with rank not greater than 4
-     *
-     * @param[in] input  The input tensor to permute. Data types supported: All
-     * @param[in] output The output tensor. Data types supported: Same as @p input
-     * @param[in] perm   Permutation vector
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PermutationVector &perm);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Template function to run the permute
-     *
-     * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
-     */
-    template <typename T>
-    void run_permute(const Window &window);
-
-    /** Common signature for all the specialised permute functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using PermuteFunctionPtr = void (NEPermuteKernel::*)(const Window &window);
-
-    PermuteFunctionPtr _func;
-    const ITensor     *_input;
-    ITensor           *_output;
-    PermutationVector  _perm;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEPERMUTEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h
deleted file mode 100644
index 6221d61f49..0000000000
--- a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H
-#define ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform addition between two tensors */
-class NEPixelWiseMultiplicationKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEPixelWiseMultiplicationKernel";
-    }
-    /** Default constructor */
-    NEPixelWiseMultiplicationKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEPixelWiseMultiplicationKernel(const NEPixelWiseMultiplicationKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEPixelWiseMultiplicationKernel &operator=(const NEPixelWiseMultiplicationKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEPixelWiseMultiplicationKernel(NEPixelWiseMultiplicationKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEPixelWiseMultiplicationKernel &operator=(NEPixelWiseMultiplicationKernel &&) = default;
-    /** Default destructor */
-    ~NEPixelWiseMultiplicationKernel() = default;
-    /** Initialise the kernel's input, output and border mode.
-     *
-     * Valid configurations (Input1,Input2) -> Output :
-     *
-     *                                                       Support: Broadcast? Scale=1/255?
-     *   - (U8,U8)                         -> U8, S16                 N          Y
-     *   - (U8,S16)                        -> S16                     N          Y
-     *   - (S16,U8)                        -> S16                     N          Y
-     *   - (S16,S16)                       -> S16                     N          Y
-     *   - (S32,S32)                       -> S32                     Y          N
-     *   - (F16,F16)                       -> F16                     N          Y
-     *   - (F32,F32)                       -> F32                     Y          Y
-     *   - (QASYMM8,QASYMM8)               -> QASYMM8                 Y          Y
-     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED          Y          Y
-     *   - (QSYMM16,QSYMM16)               -> QSYMM16, S32            N          Y
-     *
-     * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
-     *       For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
-     *
-     * @param[in]  input1          First input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
-     * @param[in]  input2          Second input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
-     * @param[out] output          Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
-     * @param[in]  scale           Scale to apply after multiplication.
-     *                             Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
-     *                             If both @p input1, @p input2 and @p output are of datatype S32, scale cannot be 1/255
-     * @param[in]  overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if any of the inputs is of quantized datatype
-     * @param[in]  rounding_policy Rounding policy.
-     */
-    void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEPixelWiseMultiplicationKernel
-     *
-     * Valid configurations (Input1,Input2) -> Output :
-     *                                                       Support: Broadcast? Scale=1/255?
-     *   - (U8,U8)                         -> U8, S16                 N          Y
-     *   - (U8,S16)                        -> S16                     N          Y
-     *   - (S16,U8)                        -> S16                     N          Y
-     *   - (S16,S16)                       -> S16                     N          Y
-     *   - (S32,S32)                       -> S32                     Y          N
-     *   - (F16,F16)                       -> F16                     N          Y
-     *   - (F32,F32)                       -> F32                     Y          Y
-     *   - (QASYMM8,QASYMM8)               -> QASYMM8                 Y          Y
-     *   - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED          Y          Y
-     *   - (QSYMM16,QSYMM16)               -> QSYMM16, S32            N          Y
-     *
-     * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported.
-     *       For all other scale values only round to zero (implemented as round towards minus infinity) is supported.
-     *
-     * @param[in] input1          First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
-     * @param[in] input2          Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
-     * @param[in] output          Output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/S32/QSYMM16/F16/F32
-     * @param[in] scale           Scale to apply after multiplication.
-     *                            Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
-     *                            If both @p input1, @p input2 and @p output are of datatype S32, scale cannot be 1/255
-     * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if any of the inputs is of quantized datatype
-     * @param[in] rounding_policy Rounding policy.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy);
-
-    // Inherited methods overridden
-    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Common signature for all the specialised multiplication functions with integer scaling factor
-     *
-     * @param[in]  in1    Input1 tensor object.
-     * @param[in]  in2    Input2 tensor object.
-     * @param[out] out    Output tensor object.
-     * @param[in]  window Region on which to execute the kernel
-     * @param[in]  scale  Integer scale factor.
-     */
-    using MulFunctionInt = void(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, int scale);
-    /** Common signature for all the specialised multiplication functions with float scaling factor
-     *
-     * @param[in]  in1    Input1 tensor object.
-     * @param[in]  in2    Input2 tensor object.
-     * @param[out] out    Output tensor object.
-     * @param[in]  window Region on which to execute the kernel
-     * @param[in]  scale  Float scale factor.
-     */
-    using MulFunctionFloat = void(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, float scale);
-    /** Common signature for all the specialised QASYMM8 multiplication functions with float scaling factor
-     *
-     * @param[in]  in1    Input1 tensor object.
-     * @param[in]  in2    Input2 tensor object.
-     * @param[out] out    Output tensor object.
-     * @param[in]  window Region on which to execute the kernel
-     * @param[in]  scale  Float scale factor.
-     *
-     */
-    using MulFunctionQuantized = void(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, float scale);
-
-    MulFunctionFloat     *_func_float;
-    MulFunctionInt       *_func_int;
-    MulFunctionQuantized *_func_quantized;
-
-private:
-    float _scale;
-    int   _scale_exponent;
-};
-
-/** Interface for the complex pixelwise multiplication kernel. */
-class NEComplexPixelWiseMultiplicationKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEComplexPixelWiseMultiplicationKernel";
-    }
-    /** Initialise the kernel's input, output and border mode.
-     *
-     * @param[in]  input1 An input tensor. Data types supported: F32. Number of channels supported: 2 (complex tensor).
-     * @param[in]  input2 An input tensor. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
-     * @param[out] output The output tensor, Data types supported: same as @p input1.  Number of channels supported: same as @p input1.
-     */
-    void configure(ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEComplexPixelWiseMultiplicationKernel
-     *
-     * @param[in] input1 An input tensor info. Data types supported: F32. Number of channels supported: 2 (complex tensor).
-     * @param[in] input2 An input tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
-     * @param[in] output The output tensor info. Data types supported: same as @p input1. Number of channels supported: same as @p input1.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-};
-
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEPIXELWISEMULTIPLICATIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
deleted file mode 100644
index 2be25080cd..0000000000
--- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H
-#define ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the pooling layer kernel */
-class NEPoolingLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEPoolingLayerKernel";
-    }
-    /** Default constructor */
-    NEPoolingLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEPoolingLayerKernel(const NEPoolingLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEPoolingLayerKernel &operator=(const NEPoolingLayerKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEPoolingLayerKernel(NEPoolingLayerKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEPoolingLayerKernel &operator=(NEPoolingLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEPoolingLayerKernel() = default;
-    /** Set the input and output tensors.
-     *
-     * @note F16 are supported for pool sizes 2 and 3 only
-     *
-     * @param[in]  input     Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[out] output    Destination tensor. Data types supported: Same as @p input.
-     * @param[in]  pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
-     * @param[out] indices   (optional) The indices of the maximal values. Data type supported: U32.
-     */
-    void configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info, ITensor *indices = nullptr);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEPoolingLayerKernel
-     *
-     * @note F16 are supported for pool sizes 2 and 3 only
-     *
-     * @param[in] input     Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in] output    Destination tensor. Data types supported: Same as @p input.
-     * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
-     * @param[in] indices   (optional) The indices of the maximal values. Data type supported: U32.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    /** Function to perform 2x2 pooling.
-     *
-     * @param[in] window_input    Input region on which to execute the kernel.
-     * @param[in] window          Output region on which to execute the kernel.
-     * @param[in] pooling_type    Pooling operation to be computed.
-     * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
-     */
-    void pooling2_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
-    /** Function to perform 2x2 pooling and compute the pooling indices. The indices can be used for max unpool.
-     *
-     * @param[in] window_input Input region on which to execute the kernel.
-     * @param[in] window       Output region on which to execute the kernel.
-     */
-    void pooling2_f32_nhwc_maxpool_indices(const Window &window_input, const Window &window);
-    /** Function to perform MxN pooling for 32-bit floating point values.
-     *
-     * @param[in] window_input    Input region on which to execute the kernel.
-     * @param[in] window          Output region on which to execute the kernel.
-     * @param[in] pooling_type    Pooling operation to be computed.
-     * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
-     */
-    void poolingMxN_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
-    /** Function to perform MxN pooling for 32-bit floating point values (NHWC).
-     *
-     * @param[in] window_input    Input region on which to execute the kernel.
-     * @param[in] window          Output region on which to execute the kernel.
-     * @param[in] pooling_type    Pooling operation to be computed.
-     * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
-     */
-    void poolingMxN_f32_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
-    /** Function to perform 7x7 pooling.
-     *
-     * @param[in] window_input    Input region on which to execute the kernel.
-     * @param[in] window          Output region on which to execute the kernel.
-     * @param[in] pooling_type    Pooling operation to be computed.
-     * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
-     */
-    void pooling7_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
-    /** Function to perform 3x3 pooling.
-     *
-     * @param[in] window_input    Input region on which to execute the kernel.
-     * @param[in] window          Output region on which to execute the kernel.
-     * @param[in] pooling_type    Pooling operation to be computed.
-     * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
-     */
-    void pooling3_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
-    /** Function to perform 2x2 pooling for float16_t.
-     *
-     * @param[in] window_input    Input region on which to execute the kernel.
-     * @param[in] window          Output region on which to execute the kernel.
-     * @param[in] pooling_type    Pooling operation to be computed.
-     * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
-     */
-    void pooling2_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
-    /** Function to perform 2x2 pooling and compute the pooling indices for FP32/FP16. The indices can be used for max unpool.
-     *
-     * @param[in] window_input Input region on which to execute the kernel.
-     * @param[in] window       Output region on which to execute the kernel.
-     */
-    template <typename T>
-    void pooling2_nchw_maxpool_indices(const Window &window_input, const Window &window);
-    /** Function to perform 2x2 pooling and compute the pooling indices. The indices can be used for max unpool.
-     *
-     * @param[in] window_input Input region on which to execute the kernel.
-     * @param[in] window       Output region on which to execute the kernel.
-     */
-    void pooling2_f16_nhwc_maxpool_indices(const Window &window_input, const Window &window);
-    /** Function to perform 3x3 pooling.
-     *
-     * @param[in] window_input    Input region on which to execute the kernel.
-     * @param[in] window          Output region on which to execute the kernel.
-     * @param[in] pooling_type    Pooling operation to be computed.
-     * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
-     */
-    void pooling3_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
-    /** Function to perform MxN pooling for 16-bit floating point values.
-     *
-     * @param[in] window_input    Input region on which to execute the kernel.
-     * @param[in] window          Output region on which to execute the kernel.
-     * @param[in] pooling_type    Pooling operation to be computed.
-     * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
-     */
-    void poolingMxN_f16_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
-    /** Function to perform MxN pooling for 16-bit floating point values. (NHWC)
-     *
-     * @param[in] window_input    Input region on which to execute the kernel.
-     * @param[in] window          Output region on which to execute the kernel.
-     * @param[in] pooling_type    Pooling operation to be computed.
-     * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
-     */
-    void poolingMxN_f16_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
-    /** Template function to perform 2x2 pooling for 8bit quantized fixed point. (NCHW)
-     *
-     * @param[in] window_input    Input region on which to execute the kernel.
-     * @param[in] window          Output region on which to execute the kernel.
-     * @param[in] pooling_type    Pooling operation to be computed.
-     * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
-     */
-    template <typename T>
-    void pooling2_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
-    /** Template function to perform 3x3 pooling for 8bit quantized fixed point. (NCHW)
-     *
-     * @param[in] window_input    Input region on which to execute the kernel.
-     * @param[in] window          Output region on which to execute the kernel.
-     * @param[in] pooling_type    Pooling operation to be computed.
-     * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
-     */
-    template <typename T>
-    void pooling3_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
-    /** Template function to perform MxN pooling for 8-bit quantized. (NCHW)
-     *
-     * @param[in] window_input    Input region on which to execute the kernel.
-     * @param[in] window          Output region on which to execute the kernel.
-     * @param[in] pooling_type    Pooling operation to be computed.
-     * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
-     */
-    template <typename T>
-    void poolingMxN_q8_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
-    /** Template function to perform MxN pooling for 8-bit quantized. (NHWC)
-     *
-     * @param[in] window_input    Input region on which to execute the kernel.
-     * @param[in] window          Output region on which to execute the kernel.
-     * @param[in] pooling_type    Pooling operation to be computed.
-     * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
-     */
-    template <typename T>
-    void poolingMxN_q8_nhwc(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
-    /** Common signature for all the specialised Pooling functions
-     *
-     * @param[in] window_input    Input region on which to execute the kernel.
-     * @param[in] window          Output region on which to execute the kernel.
-     * @param[in] pooling_type    Pooling operation to be computed.
-     * @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
-     */
-    using PoolingFunction = void (NEPoolingLayerKernel::*)(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding);
-
-private:
-    PoolingFunction  _func;
-    const ITensor   *_input;
-    ITensor         *_output;
-    ITensor         *_indices;
-    PoolingLayerInfo _pool_info;
-    DataLayout       _data_layout;
-    unsigned int     _num_elems_processed_per_iteration;
-    BorderSize       _border_size;
-    bool             _is_square;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEPOOLINGLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h b/arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h
deleted file mode 100644
index 84db99100b..0000000000
--- a/arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H
-#define ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to calculate prior boxes */
-class NEPriorBoxLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEPriorBoxLayerKernel";
-    }
-    /** Default constructor */
-    NEPriorBoxLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEPriorBoxLayerKernel(const NEPriorBoxLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEPriorBoxLayerKernel &operator=(const NEPriorBoxLayerKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEPriorBoxLayerKernel(NEPriorBoxLayerKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEPriorBoxLayerKernel &operator=(NEPriorBoxLayerKernel &&) = default;
-    /** Set the input and output tensors.
-     *
-     * @param[in]  input1 First source tensor. Data types supported: F32. Data layouts supported: NCHW/NHWC.
-     * @param[in]  input2 Second source tensor. Data types and layouts supported: same as @p input1
-     * @param[out] output Destination tensor. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input
-     * @param[in]  info   Prior box layer info.
-     */
-    void configure(const ITensor *input1, const ITensor *input2, ITensor *output, const PriorBoxLayerInfo &info);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEPriorBoxLayerKernel
-     *
-     * @param[in] input1 First source tensor info. Data types supported: F32. Data layouts supported: NCHW/NHWC.
-     * @param[in] input2 Second source tensor info. Data types and layouts supported: same as @p input1
-     * @param[in] output Destination tensor info. Output dimensions are [W * H * num_priors * 4, 2]. Data type supported: same as @p input
-     * @param[in] info   Prior box layer info.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, const PriorBoxLayerInfo &info);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Stores the coordinates of the calculated prior boxes.
-     *
-     * @param[out] out        Output pointer.
-     * @param[in]  offset     Output offset to write to.
-     * @param[in]  center_x   Center pixel value on x-axis.
-     * @param[in]  center_y   Center pixel value on y-axis.
-     * @param[in]  box_width  Prior box width.
-     * @param[in]  box_height Prior box height.
-     * @param[in]  width      Input width.
-     * @param[in]  height     Input height.
-     */
-    void store_coordinates(float *out, const int offset, const float center_x, const float center_y, const float box_width, const float box_height, const int width, const int height);
-    /** Function to calculate prior boxes.
-     *
-     * @param[in] window Input region on which to execute the kernel.
-     */
-    void calculate_prior_boxes(const Window &window);
-
-    const ITensor    *_input1;
-    const ITensor    *_input2;
-    ITensor          *_output;
-    PriorBoxLayerInfo _info;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEPRIORBOXLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h b/arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h
deleted file mode 100644
index 86c9e1d3af..0000000000
--- a/arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H
-#define ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include <functional>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform layer normalization */
-class NEQLSTMLayerNormalizationKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEQLSTMLayerNormalizationKernel";
-    }
-    /** Default constructor */
-    NEQLSTMLayerNormalizationKernel() = default;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEQLSTMLayerNormalizationKernel(const NEQLSTMLayerNormalizationKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEQLSTMLayerNormalizationKernel &operator=(const NEQLSTMLayerNormalizationKernel &) = delete;
-    /** Default Move Constructor. */
-    NEQLSTMLayerNormalizationKernel(NEQLSTMLayerNormalizationKernel &&) = default;
-    /** Default move assignment operator */
-    NEQLSTMLayerNormalizationKernel &operator=(NEQLSTMLayerNormalizationKernel &&) = default;
-    /** Default destructor */
-    ~NEQLSTMLayerNormalizationKernel() = default;
-
-    /** Set the input and output tensors.
-     *
-     * @param[in]  input  Source tensor. Data types supported: QSYMM16.
-     * @param[out] output Destination tensor. Data types supported: Same as @p input.
-     * @param[in]  weight Weight tensor. Data types supported: Same as @p input.
-     * @param[in]  bias   Bias tensor. Data types supported: S32
-     */
-    void configure(const ITensor *input, ITensor *output, const ITensor *weight, const ITensor *bias);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEQLSTMLayerNormalizationKernel
-     *
-     * @param[in] input  Source tensor info. Data types supported: QSYMM16.
-     * @param[in] output Destination tensor info. Data types supported: Same as @p input.
-     * @param[in] weight Weight tensor info. Data types supported: Same as @p input.
-     * @param[in] bias   Bias tensor info. Data types supported: S32
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *weight, const ITensorInfo *bias);
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    // constants
-    static constexpr uint32_t max_input_dimension{ 2 };  /**< The maximum input dimension supported */
-    static constexpr uint32_t max_weight_dimension{ 1 }; /**< The maximum weight dimension supported */
-    static constexpr uint32_t max_bias_dimension{ 1 };   /**< The maximum bias dimension supported */
-    static constexpr uint32_t vector_size_byte{ 16 };    /**< Computation vector size in byte */
-
-    using ComputeFuncType = std::function<void(NEQLSTMLayerNormalizationKernel &)>;
-
-    ComputeFuncType _fn{}; /**< Function pointer to computation function */
-
-    const ITensor *_input{ nullptr };  /**< Input tensor */
-    const ITensor *_weight{ nullptr }; /**< Weight tensor */
-    const ITensor *_bias{ nullptr };   /**< Bias tensor */
-    ITensor       *_output{ nullptr }; /**< Output tensor */
-
-    int32_t _output_multiplier{}; /**< Multiplier for output values */
-    int32_t _output_shift{};      /**< Shift value for output values */
-
-    int32_t _window_start_x{}; /**< The beginning of x-axis iteration */
-    int32_t _window_end_x{};   /**< The end of x-axis iteration */
-    int32_t _window_step_x{};  /**< The size of x-axis iteration's step */
-
-    Window _inout_window{};  /**< Window for input and output tensor */
-    Window _weight_window{}; /**< Window for weight and bias tensor */
-
-    /** Function to configure initial windows for destination of computation
-     *
-     * @param[in] Target destination tensor to use for output window
-     *
-     * @return configured window
-     */
-    Window configure_window(ITensor *target);
-    // Function to compute for data type QSYMM16
-    void compute_qsymm16();
-    /** Function to compute summation and summation of squared input of the given input pointer
-     *
-     * @param[in] Input_ptr pointer to input array
-     *
-     */
-    std::pair<int64_t, int64_t> sum_qsymm16(const int16_t *input_ptr);
-    /** Function to normalize values using computed mean and standard deviation
-     *
-     * @param[in] input_ptr     Pointer to input array
-     * @param[in] output_ptr    Pointer to output array
-     * @param[in] weight_ptr    Pointer to weight array
-     * @param[in] bias_ptr      Pointer to bias array
-     * @param[in] mean          Mean value
-     * @param[in] inv_std_mul   Quantized multiplier for standard deviation
-     * @param[in] inv_std_shift Shift for standard deviation
-     *
-     */
-    void normalize_qasymm16(const int16_t *input_ptr,
-                            int16_t       *output_ptr,
-                            const int16_t *weight_ptr,
-                            const int32_t *bias_ptr,
-                            int32_t mean, int32_t inv_std_mul, int32_t inv_std_shift);
-    /** Function to compute output quantization information */
-    QuantizationInfo compute_output_qinfo();
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEQLSTMLAYERNORMALIZATIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
deleted file mode 100644
index d35e027ff5..0000000000
--- a/arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H
-#define ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the quantization layer kernel.
- *
- * @note The implementation supports only 3D input tensors
- *
- */
-class NEQuantizationLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEQuantizationLayerKernel";
-    }
-    /** Default constructor */
-    NEQuantizationLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEQuantizationLayerKernel(const NEQuantizationLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEQuantizationLayerKernel &operator=(const NEQuantizationLayerKernel &) = delete;
-    /** Default Move Constructor. */
-    NEQuantizationLayerKernel(NEQuantizationLayerKernel &&) = default;
-    /** Default move assignment operator */
-    NEQuantizationLayerKernel &operator=(NEQuantizationLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEQuantizationLayerKernel() = default;
-    /** Set the input, output.
-     *
-     * @param[in]  input  Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
-     * @param[out] output Destination tensor with the same dimensions of input. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
-     *
-     * @note Output auto initialization is not supported by this kernel
-     */
-    void configure(const ITensor *input, ITensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEQuantizationLayerKernel
-     *
-     * @param[in] input  Input tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
-     * @param[in] output Output tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QASYMM16.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Common signature for all the specialised @ref NEQuantizationLayerKernel functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using QuantizationFunctionExecutorPtr = void (NEQuantizationLayerKernel::*)(const Window &window);
-    /** Function to apply QASYMM8 or QASYMM8_SIGNED quantization on a tensor.
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    template <typename TIn, typename TOut>
-    void run_quantize_qasymm8(const Window &window);
-    /** Function to apply QASYMM16 quantization on a tensor.
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    template <typename T>
-    void run_quantize_qasymm16(const Window &window);
-
-    const ITensor *_input;
-    ITensor       *_output;
-
-    QuantizationFunctionExecutorPtr _func;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEQUANTIZATIONLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h b/arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h
deleted file mode 100644
index 66ebb5e261..0000000000
--- a/arm_compute/core/NEON/kernels/NEROIAlignLayerKernel.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H
-#define ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the RoIAlign kernel.
- */
-class NEROIAlignLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEROIAlignLayerKernel";
-    }
-
-    /** Constructor */
-    NEROIAlignLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEROIAlignLayerKernel(const NEROIAlignLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEROIAlignLayerKernel &operator=(const NEROIAlignLayerKernel &) = delete;
-    /** Default Move Constructor. */
-    NEROIAlignLayerKernel(NEROIAlignLayerKernel &&) = default;
-    /** Default move assignment operator. */
-    NEROIAlignLayerKernel &operator=(NEROIAlignLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEROIAlignLayerKernel() = default;
-
-    /** Set the input and output tensors.
-     *
-     * @param[in]  input     Source tensor. Data types supported: QASYMM8/F16/F32.
-     * @param[in]  rois      ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
-     *                       as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ].
-     *                       Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8, otherwise same as @p input
-     * @param[out] output    Destination tensor. Data types supported: Same as @p input.
-     * @param[in]  pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
-     *
-     * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
-     * width and pooled height.
-     * @note The z dimensions of @p output tensor and @p input tensor must be the same.
-     * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
-     */
-    void configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEROIAlignLayerKernel
-     *
-     * @param[in] input     Source tensor info. Data types supported: QASYMM8/F16/F32.
-     * @param[in] rois      ROIs tensor info. Data types supported: QASYMM16 with scale of 0.125 and 0 offset if @p input is QASYMM8,
-     *                      otherwise same as @p input
-     * @param[in] output    Destination tensor info. Data types supported: Same as @p input.
-     * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
-     *
-     * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
-     * width and pooled height.
-     * @note The z dimensions of @p output tensor and @p input tensor must be the same.
-     * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
-     *
-     * @return a Status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    template <DataLayout data_layout, typename input_data_type, typename roi_data_type = input_data_type>
-    void internal_run(const Window &window, const ThreadInfo &info);
-
-    const ITensor      *_input;
-    ITensor            *_output;
-    const ITensor      *_rois;
-    ROIPoolingLayerInfo _pool_info;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEROIALIGNLAYERKERNEL_H*/
diff --git a/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h
deleted file mode 100644
index fa9685bc6b..0000000000
--- a/arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H
-#define ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-#include "arm_compute/core/IArray.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the ROI pooling layer kernel */
-class NEROIPoolingLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEROIPoolingLayerKernel";
-    }
-    /** Default constructor */
-    NEROIPoolingLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEROIPoolingLayerKernel(const NEROIPoolingLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEROIPoolingLayerKernel &operator=(const NEROIPoolingLayerKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEROIPoolingLayerKernel(NEROIPoolingLayerKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEROIPoolingLayerKernel &operator=(NEROIPoolingLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEROIPoolingLayerKernel() = default;
-
-    /** Set the input and output tensors.
-     *
-     * @param[in]  input     Source tensor. Data types supported: F32.
-     * @param[in]  rois      ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
-     *                       as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16
-     * @param[out] output    Destination tensor. Data types supported: Same as @p input.
-     * @param[in]  pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
-     *
-     * @note The x and y dimensions of @p output tensor must be the same as that specified by @p pool_info 's pooled
-     * width and pooled height.
-     * @note The z dimensions of @p output tensor and @p input tensor must be the same.
-     * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois tensor.
-     */
-    void configure(const ITensor *input, const ITensor *rois, ITensor *output, const ROIPoolingLayerInfo &pool_info);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor      *_input;
-    const ITensor      *_rois;
-    ITensor            *_output;
-    ROIPoolingLayerInfo _pool_info;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEROIPOOLINGLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NERangeKernel.h b/arm_compute/core/NEON/kernels/NERangeKernel.h
deleted file mode 100644
index 84ebd53b1b..0000000000
--- a/arm_compute/core/NEON/kernels/NERangeKernel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NERANGEKERNEL_H
-#define ARM_COMPUTE_NERANGEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Kernel class for Range
- *
- * range generates a 1-D tensor containing a sequence of numbers that begins at 'start' and extends by increments
- * of 'step' up to but not including 'end'.
- */
-class NERangeKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NERangeKernel";
-    }
-    /** Default constructor */
-    NERangeKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NERangeKernel(const NERangeKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NERangeKernel &operator=(const NERangeKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NERangeKernel(NERangeKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NERangeKernel &operator=(NERangeKernel &&) = default;
-    /** Default destructor */
-    ~NERangeKernel() = default;
-    /** Initialize the kernel's output tensor, start, end and step of the sequence.
-     *
-     * @param[out] output Output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
-     * @param[in]  start  The starting value of the sequence.
-     * @param[in]  end    The ending (not including) value of the sequence.
-     * @param[in]  step   The gap between each pair of values in the sequence.
-     */
-    void configure(ITensor *output, float start, float end, float step);
-    /** Static function to check if given info will lead to a valid configuration of @ref NERangeKernel
-     *
-     * @param[in] output Output tensor info. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
-     * @param[in] start  The starting value of the sequence.
-     * @param[in] end    The ending (not including) value of the sequence.
-     * @param[in] step   The gap between each pair of values in the sequence.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *output, float start, float end, float step);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    using RangeFunction = void(ITensor *output, float start, float step, const Window &window);
-
-    RangeFunction *_func;   /**< Range function to be called */
-    float          _start;  /**< Start of sequence */
-    float          _end;    /**< End of sequence */
-    float          _step;   /**< Increment/step value */
-    ITensor       *_output; /**< Destination tensor */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NERANGEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h b/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h
deleted file mode 100644
index b913ea4e90..0000000000
--- a/arm_compute/core/NEON/kernels/NEReductionOperationKernel.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H
-#define ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a reduction operation
- *
- * @note For ARG_MIN/ARG_MAX reduction, the default data type for an uninitialized
- *       output tensor is signed 32-bit integer (S32). It is the user's responsibility
- *       to check that the results do not overflow because the indices are computed
- *       in unsigned 32-bit (U32).
- */
-class NEReductionOperationKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEReductionOperationKernel";
-    }
-    /** Default constructor */
-    NEReductionOperationKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEReductionOperationKernel(const NEReductionOperationKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEReductionOperationKernel &operator=(const NEReductionOperationKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEReductionOperationKernel(NEReductionOperationKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEReductionOperationKernel &operator=(NEReductionOperationKernel &&) = default;
-    /** Default destructor */
-    ~NEReductionOperationKernel() = default;
-
-    /** Set the source, destination of the kernel
-     *
-     * @param[in]  input  Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW.
-     * @param[out] output Destination tensor.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX.
-     *                    Output will have the same number of dimensions as input.
-     * @param[in]  axis   Axis along which to reduce. Supported reduction axis : 0
-     * @param[in]  op     Reduction operation to perform.
-     */
-    void configure(const ITensor *input, ITensor *output, unsigned int axis, ReductionOperation op);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref NEReductionOperationKernel.
-     *
-     * @param[in] input  Source tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW.
-     * @param[in] output Destination tensor info.Data types and data layouts supported: same as @p input, S32 for ARG_MIX/ARG_MAX.
-     *                   Output will have the same number of dimensions as input.
-     * @param[in] axis   Axis along which to reduce. Supported reduction axis : 0
-     * @param[in] op     Reduction operation to perform.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int axis, ReductionOperation op);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor     *_input;
-    ITensor           *_output;
-    unsigned int       _reduction_axis;
-    ReductionOperation _op;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEREDUCTIONOPERATIONKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NERemapKernel.h b/arm_compute/core/NEON/kernels/NERemapKernel.h
deleted file mode 100644
index 34c80a38d9..0000000000
--- a/arm_compute/core/NEON/kernels/NERemapKernel.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEREMAPKERNEL_H
-#define ARM_COMPUTE_NEREMAPKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a remap on a tensor */
-class NERemapKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NERemapKernel";
-    }
-    /** Default constructor */
-    NERemapKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NERemapKernel(const NERemapKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NERemapKernel &operator=(const NERemapKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NERemapKernel(NERemapKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NERemapKernel &operator=(NERemapKernel &&) = default;
-    /** Default destructor */
-    ~NERemapKernel() = default;
-
-    /** Initialize the kernel's input, output and border mode.
-     *
-     * @param[in]  input  Source tensor. Data type supported: U8.
-     * @param[in]  map_x  Map for X coordinates. Data type supported: F32.
-     * @param[in]  map_y  Map for Y coordinates. Data type supported: F32.
-     * @param[out] output Destination tensor. Data types supported: U8. All but the lowest two dimensions must be the same size as in the input tensor, i.e. remapping is only performed within the XY-plane.
-     * @param[in]  policy The interpolation type.
-     */
-    void configure(const ITensor *input, const ITensor *map_x, const ITensor *map_y, ITensor *output, InterpolationPolicy policy);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    /** function to perform nearest interpolation on the given window */
-    void remap_nearest(const Window &window);
-    /** function to perform bilinear interpolation on the given window */
-    void remap_bilinear(const Window &window);
-    /** Remap function to use for the particular interpolation type passed to configure() */
-    void (NERemapKernel::*_func)(const Window &window);
-
-    const ITensor *_input;  /**< Input image */
-    ITensor       *_output; /**< Output image */
-    const ITensor *_map_x;  /**< Input remap x coordinates */
-    const ITensor *_map_y;  /**< Input remap y coordinates */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEREMAPKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h b/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h
deleted file mode 100644
index d751a6b24c..0000000000
--- a/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEREORGLAYERKERNEL_H
-#define ARM_COMPUTE_NEREORGLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the kernel to perform tensor re-organization */
-class NEReorgLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEReorgLayerKernel";
-    }
-    /** Default constructor */
-    NEReorgLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEReorgLayerKernel(const NEReorgLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEReorgLayerKernel &operator=(const NEReorgLayerKernel &) = delete;
-    /** Default Move Constructor. */
-    NEReorgLayerKernel(NEReorgLayerKernel &&) = default;
-    /** Default move assignment operator */
-    NEReorgLayerKernel &operator=(NEReorgLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEReorgLayerKernel() = default;
-    /** Set the input and output of the kernel
-     *
-     * @param[in]  input  Source tensor. Data type supported: All
-     * @param[out] output Destination tensor. Data type supported: Same as @p input
-     * @param[in]  stride Stride to be used during data re-organization.
-     *                    It defines the spatial distance between 2 consecutive pixels in the x and y direction
-     */
-    void configure(const ITensor *input, ITensor *output, int32_t stride);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayerKernel
-     *
-     * @param[in] input  Source tensor info. Data type supported: All
-     * @param[in] output Destination tensor info. Data type supported: Same as @p input
-     * @param[in] stride Stride to be used during data re-organization
-     *                   It defines the spatial distance between 2 consecutive pixels in the x and y direction
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t stride);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input;
-    ITensor       *_output;
-    int32_t        _stride;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEREORGLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h b/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h
deleted file mode 100644
index a4b8426e41..0000000000
--- a/arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NERESHAPELAYERKERNEL_H
-#define ARM_COMPUTE_NERESHAPELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the kernel to perform tensor reshaping */
-class NEReshapeLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEReshapeLayerKernel";
-    }
-    /** Set the input and output info of the kernel
-     *
-     * @param[in]  input  Source tensor info. Data type supported: All
-     * @param[out] output Destination tensor info. Data type supported: Same as @p input
-     */
-    void configure(const ITensorInfo *input, ITensorInfo *output);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref NEReshapeLayerKernel
-     *
-     * @param[in] input  Source tensor info. Data type supported: All
-     * @param[in] output Destination tensor info. Data type supported: Same as @p input
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NERESHAPELAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEReverseKernel.h b/arm_compute/core/NEON/kernels/NEReverseKernel.h
deleted file mode 100644
index fda79154a0..0000000000
--- a/arm_compute/core/NEON/kernels/NEReverseKernel.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEREVERSEKERNEL_H
-#define ARM_COMPUTE_NEREVERSEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the reverse layer kernel. */
-class NEReverseKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEReverseKernel";
-    }
-    /** Default constructor */
-    NEReverseKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEReverseKernel(const NEReverseKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEReverseKernel &operator=(const NEReverseKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEReverseKernel(NEReverseKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEReverseKernel &operator=(NEReverseKernel &&) = default;
-    /** Default destructor */
-    ~NEReverseKernel() = default;
-    /** Initialise the kernel's inputs and output
-     *
-     * @param[in]  input  Input tensor. Data types supported: All
-     * @param[out] output Output tensor. Data type supported: Same as @p input
-     * @param[in]  axis   Axis tensor. Contains the indices of the dimensions to reverse. Data type supported: U32
-     */
-    void configure(const ITensor *input, ITensor *output, const ITensor *axis);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref NEReverseKernel
-     *
-     * @param[in] input  Input tensor info. Data types supported: All
-     * @param[in] output Output tensor info. Data type supported: Same as @p input
-     * @param[in] axis   Axis tensor info. Contains the indices of the dimensions to reverse. Data type supported: U32
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *axis);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input;
-    ITensor       *_output;
-    const ITensor *_axis;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEREVERSEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEScaleKernel.h b/arm_compute/core/NEON/kernels/NEScaleKernel.h
deleted file mode 100644
index b35bb72741..0000000000
--- a/arm_compute/core/NEON/kernels/NEScaleKernel.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESCALEKERNEL_H
-#define ARM_COMPUTE_NESCALEKERNEL_H
-
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform scaling on a tensor */
-class NEScaleKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEScaleKernel";
-    }
-    /** Default constructor */
-    NEScaleKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEScaleKernel(const NEScaleKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEScaleKernel &operator=(const NEScaleKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEScaleKernel(NEScaleKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEScaleKernel &operator=(NEScaleKernel &&) = default;
-    /** Default destructor */
-    ~NEScaleKernel() = default;
-
-    /** Initialise the kernel's inputs, output and interpolation policy
-     *
-     * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor
-     * @note Using @p policy Area only supports data layout NCHW and input data type U8.
-     *
-     * @param[in]  input   Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32.
-     * @param[in]  dx      Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32
-     * @param[in]  dy      Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32
-     * @param[in]  offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32.
-     * @param[out] output  Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
-     * @param[in]  info    @ref ScaleKernelInfo to use for configuration
-     */
-    void configure(const ITensor *input, const ITensor *dx, const ITensor *dy, const ITensor *offsets, ITensor *output,
-                   const ScaleKernelInfo &info);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEScaleKernel
-     *
-     * @note dx, dy and offsets have the same dimensions (width and height) of the output tensor
-     * @note Using @p policy Area only supports data layout NCHW and input data type U8.
-     *
-     * @param[in] input   Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/U8/S16/F16/F32.
-     * @param[in] dx      Pixel's distance between the X real coordinate and the smallest X following integer. Data type supported: F32
-     * @param[in] dy      Pixel's distance between the Y real coordinate and the smallest Y following integer. Data type supported: F32
-     * @param[in] offsets Offset to access the pixel with NEAREST interpolation or the top-left pixel with BILINEAR interpolation in the input tensor. Data type supported: S32.
-     * @param[in] output  Destination tensor. Data types supported: Same as @p input. All but the lowest two dimensions must be the same size as in the input tensor, i.e. scaling is only performed within the XY-plane.
-     * @param[in] info    @ref ScaleKernelInfo to use for validation
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *dx, const ITensorInfo *dy, const ITensorInfo *offsets, ITensorInfo *output,
-                           const ScaleKernelInfo &info);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** function to perform scale using area interpolation on the given window
-     *
-     *  @note Used only in case down-sampling.
-     */
-    void scale_area_nchw_u8(const Window &window);
-
-    /** function to perform scale using bilinear interpolation on the given window */
-    template <typename T>
-    void scale_bilinear_nchw(const Window &window);
-    /** function to perform scale using bilinear interpolation on the given window */
-    template <typename T>
-    void scale_bilinear_nhwc(const Window &window);
-    /** function to perform scale using bilinear interpolation on the given window */
-    template <typename T>
-    void scale_bilinear_qasymm(const Window &window);
-
-    /** function to perform scale using nearest neighbour on the given window */
-    template <typename T>
-    void scale_nearest_nchw(const Window &window);
-    /** function to perform scale using nearest neighbour on the given window */
-    template <typename T>
-    void scale_nearest_nhwc(const Window &window);
-
-    /** Scale function to use for the particular function to use */
-    using ScaleFunctionPtr = void (NEScaleKernel::*)(const Window &window);
-
-    ScaleFunctionPtr    _func;
-    const ITensor      *_offsets;
-    const ITensor      *_dx;
-    const ITensor      *_dy;
-    const ITensor      *_input;
-    ITensor            *_output;
-    InterpolationPolicy _policy;
-    BorderMode          _border_mode;
-    PixelValue          _constant_border_value;
-    float               _sampling_offset;
-    bool                _align_corners;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NESCALEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h b/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h
deleted file mode 100644
index 7e1fdb5d9e..0000000000
--- a/arm_compute/core/NEON/kernels/NEScharr3x3Kernel.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESCHARR3x3KERNEL_H
-#define ARM_COMPUTE_NESCHARR3x3KERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to run a 3x3 Scharr filter on a tensor.
- *
-* @f[
-*      \mathbf{G}_x=\begin{vmatrix}
-*      -3 & 0 & +3\\
-*      -10& 0 & +10\\
-*      -3 & 0 & +3
-*      \end{vmatrix}
-* @f]
-*/
-class NEScharr3x3Kernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEScharr3x3Kernel";
-    }
-    /** Default constructor */
-    NEScharr3x3Kernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEScharr3x3Kernel(const NEScharr3x3Kernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEScharr3x3Kernel &operator=(const NEScharr3x3Kernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEScharr3x3Kernel(NEScharr3x3Kernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEScharr3x3Kernel &operator=(NEScharr3x3Kernel &&) = default;
-    /** Default destructor */
-    ~NEScharr3x3Kernel() = default;
-
-    /** Initialise the kernel's source, destination and border.
-     *
-     * @note At least one of output_x or output_y must be set.
-     *
-     * @param[in]  input            Source tensor. Data type supported: U8.
-     * @param[out] output_x         (Optional) Destination tensor for the X gradient. Data type supported: S16.
-     * @param[out] output_y         (Optional) Destination tensor for the Y gradient. Data type supported: S16.
-     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
-     */
-    void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    bool           _run_scharr_x; /**< Do we need to run Scharr X ? */
-    bool           _run_scharr_y; /**< Do we need to run Scharr Y ? */
-    const ITensor *_input;        /**< Input tensor */
-    ITensor       *_output_x;     /**< Output tensor for scharr X */
-    ITensor       *_output_y;     /**< Output tensor for scharr Y */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NESCHARR3x3KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NESelectKernel.h b/arm_compute/core/NEON/kernels/NESelectKernel.h
deleted file mode 100644
index bb8695f598..0000000000
--- a/arm_compute/core/NEON/kernels/NESelectKernel.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESELECTKERNEL_H
-#define ARM_COMPUTE_NESELECTKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the select kernel
- *
- * Select is computed by:
- * @f[ output(i) = condition(i) ? x(i) : y(i) @f]
- *
- */
-class NESelectKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NESelectKernel";
-    }
-    /** Default constructor */
-    NESelectKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NESelectKernel(const NESelectKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NESelectKernel &operator=(const NESelectKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NESelectKernel(NESelectKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NESelectKernel &operator=(NESelectKernel &&) = default;
-    /** Default destructor */
-    ~NESelectKernel() = default;
-
-    /** Common signature for all the specialised elementwise functions
-     *
-     * @param[in]  c      Condition input tensor. Data types supported: U8.
-     * @param[in]  x      First input tensor. Data types supported: All.
-     * @param[out] y      Second input tensor. Data types supported: Same as @p x
-     * @param[in]  output Output tensor. Data types supported: Same as @p x
-     */
-    void configure(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output);
-
-    /** Validate the argument passed to the kernel
-     *
-     * @param[in] c      Condition input tensor. Data types supported: U8.
-     * @param[in] x      First input tensor. Data types supported: All.
-     * @param[in] y      Second input tensor. Data types supported: Same as @p x
-     * @param[in] output Output tensor. Data types supported: Same as @p x.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *c, const ITensorInfo *x, const ITensorInfo *y, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Common signature for all the specialised select functions
-     *
-     * @param[in] c      Condition input tensor. Data types supported: U8.
-     * @param[in] x      First input tensor. Data types supported: All.
-     * @param[in] y      Second input tensor. Data types supported: Same as @p x
-     * @param[in] output Output tensor. Data types supported: Same as @p x.
-     */
-    using SelectFunction = void(const ITensor *c, const ITensor *x, const ITensor *y, ITensor *output, const Window &window);
-
-    /** Select function to use for the particular tensor types passed to configure() */
-    SelectFunction *_function;
-    const ITensor *_c;              /**< Condition tensor */
-    const ITensor *_x;              /**< Source tensor 1 */
-    const ITensor *_y;              /**< Source tensor 2 */
-    ITensor        *_output;        /**< Destination tensor */
-    bool            _has_same_rank; /**< Flag that indicates if condition tensor and other inputs have the same rank */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NESELECTKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h b/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h
deleted file mode 100644
index 66a13c4c26..0000000000
--- a/arm_compute/core/NEON/kernels/NESobel3x3Kernel.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESOBEL3x3KERNEL_H
-#define ARM_COMPUTE_NESOBEL3x3KERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to run a 3x3 Sobel X filter on a tensor.
- *
- * @f[
- *      \mathbf{G}_x=\begin{vmatrix}
- *      -1 & 0 & +1\\
- *      -2 & 0 & +2\\
- *      -1 & 0 & +1
- *      \end{vmatrix}
- * @f]
-*/
-class NESobel3x3Kernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NESobel3x3Kernel";
-    }
-    /** Default constructor */
-    NESobel3x3Kernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NESobel3x3Kernel(const NESobel3x3Kernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NESobel3x3Kernel &operator=(const NESobel3x3Kernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NESobel3x3Kernel(NESobel3x3Kernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NESobel3x3Kernel &operator=(NESobel3x3Kernel &&) = default;
-    /** Default destructor */
-    ~NESobel3x3Kernel() = default;
-
-    /** Initialise the kernel's source, destination and border mode.
-     *
-     * @note At least one of output_x or output_y must be set.
-     *
-     * @param[in]  input            Source tensor. Data type supported: U8.
-     * @param[out] output_x         (Optional) Destination tensor for the X gradient. Data type supported: S16.
-     * @param[out] output_y         (Optional) Destination tensor for the Y gradient. Data type supported: S16.
-     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
-     */
-    void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    bool           _run_sobel_x; /**< Do we need to run Sobel X ? */
-    bool           _run_sobel_y; /**< Do we need to run Sobel Y ? */
-    const ITensor *_input;       /**< Input tensor */
-    ITensor       *_output_x;    /**< Output tensor for sobel X */
-    ITensor       *_output_y;    /**< Output tensor for sobel Y */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NESOBEL3x3KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h b/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h
deleted file mode 100644
index 02029b6a47..0000000000
--- a/arm_compute/core/NEON/kernels/NESobel5x5Kernel.h
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESOBEL5x5KERNEL_H
-#define ARM_COMPUTE_NESOBEL5x5KERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to run the horizontal pass of 5x5 Sobel filter on a tensor.
- *
- */
-class NESobel5x5HorKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NESobel5x5HorKernel";
-    }
-    /** Default constructor */
-    NESobel5x5HorKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NESobel5x5HorKernel(const NESobel5x5HorKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NESobel5x5HorKernel &operator=(const NESobel5x5HorKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NESobel5x5HorKernel(NESobel5x5HorKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NESobel5x5HorKernel &operator=(NESobel5x5HorKernel &&) = default;
-    /** Default destructor */
-    ~NESobel5x5HorKernel() = default;
-
-    /** Initialise the kernel's source, destination and border mode.
-     *
-     * @note At least one of output_x or output_y must be set
-     *
-     * @param[in]  input            Source tensor. Data type supported: U8.
-     * @param[out] output_x         (Optional) Destination tensor for the X gradient. Data type supported: S16.
-     * @param[out] output_y         (Optional) Destination tensor for the Y gradient. Data type supported: S16.
-     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
-     */
-    void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    const ITensor *_input;       /**< Input tensor */
-    ITensor       *_output_x;    /**< X output of horizontal pass */
-    ITensor       *_output_y;    /**< Y output of horizontal pass */
-    bool           _run_sobel_x; /**< Do we need to run Sobel X? */
-    bool           _run_sobel_y; /**< Do we need to run Sobel Y? */
-    BorderSize     _border_size; /**< Border size */
-};
-
-/** Interface for the kernel to run the vertical pass of 5x5 Sobel Y filter on a tensor.
- *
-*/
-class NESobel5x5VertKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NESobel5x5VertKernel";
-    }
-    /** Default constructor */
-    NESobel5x5VertKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NESobel5x5VertKernel(const NESobel5x5VertKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NESobel5x5VertKernel &operator=(const NESobel5x5VertKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NESobel5x5VertKernel(NESobel5x5VertKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NESobel5x5VertKernel &operator=(NESobel5x5VertKernel &&) = default;
-    /** Default destructor */
-    ~NESobel5x5VertKernel() = default;
-
-    /** Initialise the kernel's source, destination and border mode.
-     *
-     * @param[in]  input_x          Input for X (X output of hor pass). Data type supported: S16.
-     * @param[in]  input_y          Input for Y (Y output of hor pass). Data type supported: S16.
-     * @param[out] output_x         Destination tensor for the X gradient. Data type supported: S16.
-     * @param[out] output_y         Destination tensor for the Y gradient. Data type supported: S16.
-     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
-     */
-    void configure(ITensor *input_x, ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    ITensor *_input_x;     /**< X input (X output of the hor pass) */
-    ITensor *_input_y;     /**< Y input (Y output of the hor pass) */
-    ITensor *_output_x;    /**< X output of sobel */
-    ITensor *_output_y;    /**< Y output of sobel */
-    bool     _run_sobel_x; /**< Do we need to run sobel X? */
-    bool     _run_sobel_y; /**< Do we need to run sobel Y? */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NESOBEL5x5KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h b/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h
deleted file mode 100644
index 0e8b82c96a..0000000000
--- a/arm_compute/core/NEON/kernels/NESobel7x7Kernel.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESOBEL7x7KERNEL_H
-#define ARM_COMPUTE_NESOBEL7x7KERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to run the horizontal pass of 7x7 Sobel filter on a tensor.
- *
- */
-class NESobel7x7HorKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NESobel7x7HorKernel";
-    }
-    /** Default constructor */
-    NESobel7x7HorKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NESobel7x7HorKernel(const NESobel7x7HorKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NESobel7x7HorKernel &operator=(const NESobel7x7HorKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NESobel7x7HorKernel(NESobel7x7HorKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NESobel7x7HorKernel &operator=(NESobel7x7HorKernel &&) = default;
-    /** Default destructor */
-    ~NESobel7x7HorKernel() = default;
-
-    /** Initialise the kernel's source, destination and border mode.
-     *
-     * @note At least one of output_x or output_y must be set.
-     *
-     * @param[in]  input            Source tensor. Data type supported: U8.
-     * @param[out] output_x         (Optional) Destination tensor for the X gradient. Data type supported: S32.
-     * @param[out] output_y         (Optional) Destination tensor for the Y gradient. Data type supported: S32.
-     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
-     */
-    void configure(const ITensor *input, ITensor *output_x, ITensor *output_y, bool border_undefined);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    const ITensor *_input;       /**< Input tensor */
-    ITensor       *_output_x;    /**< X output of horizontal pass */
-    ITensor       *_output_y;    /**< Y output of horizontal pass */
-    bool           _run_sobel_x; /**< Do we need to run Sobel X? */
-    bool           _run_sobel_y; /**< Do we need to run Sobel Y? */
-    BorderSize     _border_size; /**< Border size */
-};
-
-/** Interface for the kernel to run the vertical pass of 7x7 Sobel Y filter on a tensor.
- *
-*/
-class NESobel7x7VertKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NESobel7x7VertKernel";
-    }
-    /** Default constructor */
-    NESobel7x7VertKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NESobel7x7VertKernel(const NESobel7x7VertKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NESobel7x7VertKernel &operator=(const NESobel7x7VertKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NESobel7x7VertKernel(NESobel7x7VertKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NESobel7x7VertKernel &operator=(NESobel7x7VertKernel &&) = default;
-    /** Default destructor */
-    ~NESobel7x7VertKernel() = default;
-
-    /** Initialise the kernel's source, destination and border mode.
-     *
-     * @note At least one of output_x or output_y must be set
-     * @note If output_x is set then input_x must be set too
-     * @note If output_y is set then input_y must be set too
-     *
-     * @param[in]  input_x          (Optional) Input for X (X output of hor pass). Data type supported: S32.
-     * @param[in]  input_y          (Optional) Input for Y (Y output of hor pass). Data type supported: S32.
-     * @param[out] output_x         (Optional) Destination tensor for the X gradient. Data type supported: S32.
-     * @param[out] output_y         (Optional) Destination tensor for the Y gradient. Data type supported: S32.
-     * @param[in]  border_undefined True if the border mode is undefined. False if it's replicate or constant.
-     */
-    void configure(const ITensor *input_x, const ITensor *input_y, ITensor *output_x, ITensor *output_y, bool border_undefined);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    const ITensor *_input_x;     /**< X input (X output of the hor pass) */
-    const ITensor *_input_y;     /**< Y input (Y output of the hor pass) */
-    ITensor       *_output_x;    /**< X output of sobel */
-    ITensor       *_output_y;    /**< Y output of sobel */
-    bool           _run_sobel_x; /**< Do we need to run sobel X? */
-    bool           _run_sobel_y; /**< Do we need to run sobel Y? */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NESOBEL7x7KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h b/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h
deleted file mode 100644
index e80cd222c5..0000000000
--- a/arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H
-#define ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the identifying the max value of 1D Logits */
-class NELogits1DMaxKernel : public INESimpleKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NELogits1DMaxKernel";
-    }
-    /** Default constructor */
-    NELogits1DMaxKernel();
-    /** Set the input and output tensors.
-     *
-     * @param[in]  input  Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[out] output Destination tensor. Data types supported: same as @p input
-     */
-    void configure(const ITensor *input, ITensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DMaxKernel
-     *
-     * @param[in] input  Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in] output Destination tensor. Data types supported: same as @p input
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    using Logits1DMaxFunction = void(const ITensor &in, ITensor &out, const Window &window);
-
-private:
-    Logits1DMaxFunction *_func;
-    BorderSize           _border_size;
-};
-
-/** Interface for softmax computation for QASYMM8 with pre-computed max. */
-template <bool IS_LOG = false>
-class NELogits1DSoftmaxKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        if(IS_LOG)
-        {
-            return "NELogits1DSoftmaxKernel";
-        }
-        else
-        {
-            return "NELogits1DLogSoftmaxKernel";
-        }
-    }
-    /** Default constructor */
-    NELogits1DSoftmaxKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NELogits1DSoftmaxKernel(const NELogits1DSoftmaxKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NELogits1DSoftmaxKernel &operator=(const NELogits1DSoftmaxKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NELogits1DSoftmaxKernel(NELogits1DSoftmaxKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NELogits1DSoftmaxKernel &operator=(NELogits1DSoftmaxKernel &&) = default;
-    /** Default destructor */
-    ~NELogits1DSoftmaxKernel() = default;
-    /** Set the input and output tensors.
-     *
-     * @param[in]  input  Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in]  max    Max values tensor. Same shape as input with dimension 0 set to 1.
-     *                    Data types supported: same as @p input.
-     * @param[out] output Destination tensor. Data types supported: same as @p input.
-     * @param[in]  beta   A scaling factor for the exponent.
-     *
-     * @param      tmp    Auxiliary tensor. Must be type F32 and same shape as the input.
-     */
-    void configure(const ITensor *input, const ITensor *max, ITensor *output, const float beta, ITensor *tmp);
-    /** Static function to check if given info will lead to a valid configuration of @ref NELogits1DSoftmaxKernel
-     *
-     * @param[in] input  Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in] max    Max values tensor info. Same shape as input with dimension 0 set to 1.
-     *                   Data types supported: same as @p input.
-     * @param[in] output Destination tensor info. Data types supported: same as @p input.
-     * @param[in] beta   A scaling factor for the exponent.
-     * @param[in] tmp    Tensor info of auxiliary. Must be type F32 and same shape as the input.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *max,
-                           const ITensorInfo *output, const float beta, const ITensorInfo *tmp);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    using LogitsSoftmaxFunction = void(const ITensor &in, const ITensor &max, void *const tmp, ITensor &out, const float beta,
-                                       const Window &window);
-
-    LogitsSoftmaxFunction *_func;
-    const ITensor         *_input;
-    const ITensor         *_max;
-    ITensor               *_output;
-    float                  _beta;
-    ITensor               *_tmp; //Temporary. Used internally
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NESOFTMAXLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h b/arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h
deleted file mode 100644
index b5d7c692f0..0000000000
--- a/arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H
-#define ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declaration
-class ITensor;
-
-/** Interface for the space to batch kernel */
-class NESpaceToBatchLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NESpaceToBatchLayerKernel";
-    }
-    /** Default constructor */
-    NESpaceToBatchLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NESpaceToBatchLayerKernel(const NESpaceToBatchLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NESpaceToBatchLayerKernel &operator=(const NESpaceToBatchLayerKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NESpaceToBatchLayerKernel(NESpaceToBatchLayerKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NESpaceToBatchLayerKernel &operator=(NESpaceToBatchLayerKernel &&) = default;
-    /** Default destructor */
-    ~NESpaceToBatchLayerKernel() = default;
-    /** Initialise the kernel's inputs and output.
-     *
-     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All.
-     * @param[in]  block_shape 1-D tensor with shape [M]. Data types supported: S32
-     * @param[in]  paddings    2-D tensor with shape [2, M]. Data types supported: S32
-     * @param[out] output      Tensor output. Data types supported: same as @p input
-     */
-    void configure(const ITensor *input, const ITensor *block_shape, const ITensor *paddings, ITensor *output);
-    /** Initialise the kernel's input and output. (Static block shape and paddings)
-     *
-     * @param[in]  input         Tensor input. Supported tensor rank: 4. Data types supported: All.
-     * @param[in]  block_shape_x Block shape x value.
-     * @param[in]  block_shape_y Block shape y value.
-     * @param[in]  padding_left  The left padding of the output tensor.
-     * @param[in]  padding_right The right padding of the output tensor.
-     * @param[out] output        Tensor output. Data types supported: same as @p input
-     */
-    void configure(const ITensor *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, ITensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayerKernel
-     *
-     * @param[in] input       Tensor input. Supported tensor rank: 4. Data types supported: All.
-     * @param[in] block_shape 1-D tensor with shape [M]. Data types supported: S32
-     * @param[in] paddings    2-D tensor with shape [2, M]. Data types supported: S32
-     * @param[in] output      Tensor output. Data types supported: same as @p input
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *block_shape, const ITensorInfo *paddings, const ITensorInfo *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToBatchLayerKernel (Static block shape and paddings)
-     *
-     * @param[in] input         Tensor input. Supported tensor rank: 4. Data types supported: All.
-     * @param[in] block_shape_x Block shape x value.
-     * @param[in] block_shape_y Block shape y value.
-     * @param[in] padding_left  The left padding of the output tensor.
-     * @param[in] padding_right The right padding of the output tensor.
-     * @param[in] output        Tensor output. Data types supported: same as @p input
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const int block_shape_x, const int block_shape_y, const Size2D &padding_left, const Size2D &padding_right, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input;       /**< Source tensor */
-    const ITensor *_block_shape; /**< Block shape tensor */
-    const ITensor *_paddings;    /**< Paddings tensor */
-    ITensor       *_output;      /**< Destination tensor */
-    DataLayout     _data_layout; /**< Data layout to be used at run-time */
-
-    Size2D _padding_left;
-    int    _block_shape_x;
-    int    _block_shape_y;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NESPACETOBATCHLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h b/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h
deleted file mode 100644
index 11443e02c5..0000000000
--- a/arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H
-#define ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the space to depth kernel */
-class NESpaceToDepthLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NESpaceToDepthLayerKernel";
-    }
-    /** Default constructor */
-    NESpaceToDepthLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NESpaceToDepthLayerKernel(const NESpaceToDepthLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NESpaceToDepthLayerKernel &operator=(const NESpaceToDepthLayerKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NESpaceToDepthLayerKernel(NESpaceToDepthLayerKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NESpaceToDepthLayerKernel &operator=(NESpaceToDepthLayerKernel &&) = default;
-    /** Default destructor */
-    ~NESpaceToDepthLayerKernel() = default;
-    /** Initialise the kernel's inputs and output.
-     *
-     * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All.
-     * @param[out] output      Tensor output. Data types supported: same as @p input
-     * @param[in]  block_shape Block shape value
-     */
-    void configure(const ITensor *input, ITensor *output, int32_t block_shape);
-    /** Static function to check if given info will lead to a valid configuration of @ref NESpaceToDepthLayerKernel
-     *
-     * @param[in] input       Tensor input info. Supported tensor rank: 4. Data types supported: All.
-     * @param[in] output      Tensor output info. Data types supported: same as @p input
-     * @param[in] block_shape Block shape value
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, int32_t block_shape);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input;       /**< Source tensor */
-    ITensor       *_output;      /**< Destination tensor */
-    int32_t        _block_shape; /**< Block shape */
-    DataLayout     _data_layout; /**< Data layout  of the operation */
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NESPACETODEPTHLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEStackLayerKernel.h b/arm_compute/core/NEON/kernels/NEStackLayerKernel.h
deleted file mode 100644
index 710a6be7f4..0000000000
--- a/arm_compute/core/NEON/kernels/NEStackLayerKernel.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NESTACKLAYERKERNEL_H
-#define ARM_COMPUTE_NESTACKLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to stacks a rank-R tensor into one with rank-(R+1) along the axis dimension.*/
-class NEStackLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEStackLayerKernel";
-    }
-    /** Default constructor */
-    NEStackLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEStackLayerKernel(const NEStackLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEStackLayerKernel &operator=(const NEStackLayerKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEStackLayerKernel(NEStackLayerKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEStackLayerKernel &operator=(NEStackLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEStackLayerKernel() = default;
-    /** Initialise the kernel's inputs and output
-     *
-     * @note Supported input tensor rank: up to 4
-     *
-     * @param[in]  input       Input tensor. Data types supported: All
-     * @param[in]  axis        The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
-     * @param[in]  idx_input   Index of the input tensor in the list of tensors to stack.
-     *                         All tensors in the list must have the same shape
-     * @param[in]  num_tensors Number of tensors to stack
-     * @param[out] output      Output tensor. Data types supported: Same as @p input.
-     *
-     */
-    void configure(const ITensor *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, ITensor *output);
-    /**  Static function to check if given info will lead to a valid configuration of @ref NEStackLayerKernel
-     *
-     * @note Supported input tensor rank: up to 4
-     *
-     * @param[in] input       Input tensor info. Data types supported: All
-     * @param[in] axis        The dimension to stack the tensors along. It must be smaller than the number of input dimensions.
-     * @param[in] idx_input   Index of the input tensor in the list of tensors to stack
-     *                        All tensors in the list must have the same shape
-     * @param[in] num_tensors Number of tensors to stack
-     * @param[in] output      Output tensor info. Data types supported: Same as @p input.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, unsigned int axis, unsigned int idx_input, unsigned int num_tensors, const ITensorInfo *output);
-
-    // Inherited methods overridden
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input;
-    ITensor       *_output;
-    unsigned int   _axis;
-    unsigned int   _idx_input;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NESTACKLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h b/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h
deleted file mode 100644
index be55fd75de..0000000000
--- a/arm_compute/core/NEON/kernels/NEStridedSliceKernel.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H
-#define ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the kernel to perform tensor strided slicing */
-class NEStridedSliceKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEStridedSliceKernel";
-    }
-    /** Default constructor */
-    NEStridedSliceKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEStridedSliceKernel(const NEStridedSliceKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEStridedSliceKernel &operator=(const NEStridedSliceKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEStridedSliceKernel(NEStridedSliceKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEStridedSliceKernel &operator=(NEStridedSliceKernel &&) = default;
-    /** Default destructor */
-    ~NEStridedSliceKernel() = default;
-    /** Configure kernel
-     *
-     * @note Supported tensor rank: up to 4
-     *
-     * @param[in]  input            Source tensor info. Data type supported: All
-     * @param[out] output           Destination tensor info. Data type supported: Same as @p input
-     * @param[in]  starts           The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
-     * @param[in]  ends             The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
-     * @param[in]  strides          The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
-     * @param[in]  begin_mask       If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
-     * @param[in]  end_mask         If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
-     * @param[in]  shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
-     *                              A slice of size 1 starting from starts[i] in the dimension must be preserved.
-     */
-    void configure(const ITensorInfo *input, ITensorInfo *output,
-                   const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
-                   int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref NEStridedSliceKernel
-     *
-     * @note Supported tensor rank: up to 4
-     *
-     * @param[in] input            Source tensor info. Data type supported: All
-     * @param[in] output           Destination tensor info. Data type supported: Same as @p input
-     * @param[in] starts           The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
-     * @param[in] ends             The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
-     * @param[in] strides          The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
-     * @param[in] begin_mask       If the ith bit of begin_mask is set, starts[i] is ignored and the fullest possible range in that dimension is used instead.
-     * @param[in] end_mask         If the ith bit of end_mask is set, ends[i] is ignored and the fullest possible range in that dimension is used instead.
-     * @param[in] shrink_axis_mask If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
-     *                             A slice of size 1 starting from starts[i] in the dimension must be preserved.
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output,
-                           const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
-                           int32_t begin_mask, int32_t end_mask, int32_t shrink_axis_mask);
-
-    // Inherited methods overridden:
-    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-
-private:
-    Coordinates _starts_abs;    /**< Absolute start coordinates */
-    Coordinates _final_strides; /**< Final strides */
-    int32_t     _shrink_mask;   /**< Shrink axis mask */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NE_STRIDED_SLICE_KERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NETableLookupKernel.h b/arm_compute/core/NEON/kernels/NETableLookupKernel.h
deleted file mode 100644
index 58bfdbeec2..0000000000
--- a/arm_compute/core/NEON/kernels/NETableLookupKernel.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NETABLELOOKUPKERNEL_H
-#define ARM_COMPUTE_NETABLELOOKUPKERNEL_H
-
-#include "arm_compute/core/NEON/INESimpleKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-class ILut;
-
-/** Interface for the kernel to perform table lookup calculations. */
-class NETableLookupKernel : public INESimpleKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NETableLookupKernel";
-    }
-    /** Default constructor */
-    NETableLookupKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NETableLookupKernel(const NETableLookupKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NETableLookupKernel &operator=(const NETableLookupKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NETableLookupKernel(NETableLookupKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NETableLookupKernel &operator=(NETableLookupKernel &&) = default;
-    /** Initialise the kernel's input, lut and output.
-     *
-     * @param[in]  input  An input tensor. Data types supported: U8/S16.
-     * @param[in]  lut    The input LUT.
-     * @param[out] output The output tensor. Data types supported: same as @p input
-     */
-    void configure(const ITensor *input, const ILut *lut, ITensor *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Perform table lookup on a given window.
-     *
-     * @param window window Region on which to execute the kernel.
-     */
-    template <class T>
-    void tableLookup(const Window &window);
-    /** Common signature for all the specialised lut functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using TableLookupFunction = void (NETableLookupKernel::*)(const Window &window);
-    /** Sub function to use for the particular tensor types passed to configure() */
-    TableLookupFunction _func;
-    const ILut         *_lut;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NETABLELOOKUPKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEThresholdKernel.h b/arm_compute/core/NEON/kernels/NEThresholdKernel.h
deleted file mode 100644
index daad47dbda..0000000000
--- a/arm_compute/core/NEON/kernels/NEThresholdKernel.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Copyright (c) 2016-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NETHRESHOLDKERNEL_H
-#define ARM_COMPUTE_NETHRESHOLDKERNEL_H
-
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the thresholding kernel */
-class NEThresholdKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEThresholdKernel";
-    }
-    /** Constructor
-     * Initialize all the pointers to nullptr and parameters to zero.
-     */
-    NEThresholdKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEThresholdKernel(const NEThresholdKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEThresholdKernel &operator=(const NEThresholdKernel &) = delete;
-    /** Initialise the kernel's input, output and threshold parameters.
-     *
-     * @param[in]  input  An input tensor. Data type supported: U8
-     * @param[out] output The output tensor. Data type supported: U8.
-     * @param[in]  info   Threshold kernel descriptor
-     */
-    void configure(const ITensor *input, ITensor *output, const ThresholdKernelInfo &info);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEThresholdKernel
-     *
-     * @param[in] input  Input tensor info. Data type supported: U8
-     * @param[in] output Output tensor info. Data type supported: U8
-     * @param[in] info   Threshold kernel descriptor
-     *
-     * @return A status containing an error code in case of failure
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ThresholdKernelInfo &info);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** run binary thresholding on the given window */
-    void run_binary(const Window &window);
-    /** run range thresholding on the given window */
-    void run_range(const Window &window);
-
-    void (NEThresholdKernel::*_func)(const Window &window);
-
-    const ITensor      *_input;  /**< Input */
-    ITensor            *_output; /**< Output */
-    ThresholdKernelInfo _info;   /**< Threshold descriptor */
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NETHRESHOLDKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NETileKernel.h b/arm_compute/core/NEON/kernels/NETileKernel.h
deleted file mode 100644
index 7a3039adc9..0000000000
--- a/arm_compute/core/NEON/kernels/NETileKernel.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2018-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NETILEKERNEL_H
-#define ARM_COMPUTE_NETILEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform a tile operation */
-class NETileKernel : public INEKernel
-{
-public:
-    /** Default constructor */
-    NETileKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
-    NETileKernel(const NETileKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers). */
-    NETileKernel &operator=(const NETileKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NETileKernel(NETileKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NETileKernel &operator=(NETileKernel &&) = default;
-    const char   *name() const override
-    {
-        return "NETileKernel";
-    }
-    /** Set the source, destination of the kernel
-     *
-     * @param[in]  input     Source tensor. Data type supported: All.
-     * @param[out] output    Destination tensor. Same as @p input
-     * @param[in]  multiples Contains the number of times the input tensor should be replicated on the given dimension.
-     */
-    void configure(const ITensor *input, ITensor *output, const Multiples &multiples);
-    /** Static function to check if given info will lead to a valid configuration of @ref NETileKernel
-     *
-     * @param[in] input     Source tensor info. Data type supported: All.
-     * @param[in] output    Destination tensor info. Same as @p input
-     * @param[in] multiples Contains the number of times the input tensor should be replicated on the given dimension.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Multiples &multiples);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input;
-    ITensor       *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NETILEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NETransposeKernel.h b/arm_compute/core/NEON/kernels/NETransposeKernel.h
deleted file mode 100644
index 1507a1c1a4..0000000000
--- a/arm_compute/core/NEON/kernels/NETransposeKernel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2017-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NETRANSPOSEKERNEL_H
-#define ARM_COMPUTE_NETRANSPOSEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel which transposes the elements of a matrix.
- *
- * [width, height, batch] -> [height, width, batch]
- *
- */
-class NETransposeKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NETransposeKernel";
-    }
-    /** Default constructor */
-    NETransposeKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NETransposeKernel(const NETransposeKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NETransposeKernel &operator=(const NETransposeKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NETransposeKernel(NETransposeKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NETransposeKernel &operator=(NETransposeKernel &&) = default;
-    /** Default destructor */
-    ~NETransposeKernel() = default;
-
-    /** Initialise the kernel's input and output.
-     *
-     * @param[in]  input  Input tensor. Data types supported: All
-     * @param[out] output Output tensor. Data type supported: Same as @p input
-     */
-    void configure(const ITensor *input, ITensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NETransposeKernel
-     *
-     * @param[in] input  Input tensor. Data types supported: All
-     * @param[in] output Output tensor. Data type supported: Same as @p input
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Common signature for all the transpose functions
-     *
-     * @param[in]  input  An input tensor. Data types supported: All
-     * @param[out] output The output tensor. Data type supported: same as @p input
-     * @param[in]  window Region on which to execute the kernel.
-     */
-    using TransposeFunction = void(const ITensor *input, ITensor *output, const Window &window);
-    /** Transpose function to use for the particular tensor types passed to configure() */
-    TransposeFunction *_func;
-    const ITensor     *_input;
-    ITensor           *_output;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NETRANSPOSEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h b/arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h
deleted file mode 100644
index a1278ea307..0000000000
--- a/arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H
-#define ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the Upsample layer kernel.*/
-class NEUpsampleLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEUpsampleLayerKernel";
-    }
-    /** Default constructor */
-    NEUpsampleLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEUpsampleLayerKernel(const NEUpsampleLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEUpsampleLayerKernel &operator=(const NEUpsampleLayerKernel &) = delete;
-    /** Default Move Constructor. */
-    NEUpsampleLayerKernel(NEUpsampleLayerKernel &&) = default;
-    /** Default move assignment operator */
-    NEUpsampleLayerKernel &operator=(NEUpsampleLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEUpsampleLayerKernel() = default;
-    /** Set the input output tensors.
-     *
-     * @param[in]  input  Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[out] output Destination tensor. Data types supported: same as @p input.
-     * @param[in]  info   Contains stride information described in @ref Size2D.
-     * @param[in]  policy Defines the policy to fill the intermediate pixels.
-     *
-     */
-    void configure(const ITensor *input, ITensor *output, const Size2D &info, const InterpolationPolicy policy);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEUpsampleLayerKernel
-     *
-     * @param[in] input  Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in] output Destination tensor info. Data types supported: same as @p input.
-     * @param[in] info   Contains stride information described in @ref Size2D.
-     * @param[in] policy Defines the policy to fill the intermediate pixels.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &info, const InterpolationPolicy policy);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Function to run upsample layer (NCHW)
-     *
-     * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
-     */
-    template <typename T, int S>
-    void upsample_nchw(const Window &window);
-    /** Function to run upsample layer (NHWC)
-     *
-     * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
-     */
-    template <typename T, int S>
-    void upsample_nhwc(const Window &window);
-
-    using UpsampleFunctionPtr = void (NEUpsampleLayerKernel::*)(const Window &window);
-
-private:
-    UpsampleFunctionPtr _func;
-    const ITensor      *_input;
-    ITensor            *_output;
-    Size2D              _info;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEUPSAMPLELAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEWarpKernel.h b/arm_compute/core/NEON/kernels/NEWarpKernel.h
deleted file mode 100644
index 21fc7b2df1..0000000000
--- a/arm_compute/core/NEON/kernels/NEWarpKernel.h
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Copyright (c) 2016-2019 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEWARPKERNEL_H
-#define ARM_COMPUTE_NEWARPKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <array>
-#include <cstdint>
-namespace arm_compute
-{
-class ITensor;
-
-/** Common interface for warp affine and warp perspective */
-class INEWarpKernel : public INEKernel
-{
-public:
-    /** Default constructor */
-    INEWarpKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    INEWarpKernel(const INEWarpKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    INEWarpKernel &operator=(const INEWarpKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    INEWarpKernel(INEWarpKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    INEWarpKernel &operator=(INEWarpKernel &&) = default;
-    /** Initialise the kernel's input, output and border mode.
-     *
-     * @param[in]  input                 Source tensor. Data type supported: U8.
-     * @param[out] output                Destination tensor. Data type supported: U8.
-     * @param[in]  matrix                The perspective or affine matrix to use. Must be 2x3 for affine and 3x3 for perspective of type float.
-     *                                   The matrix argument requires 9 values, for the affine case the last 3 values are ignored.
-     * @param[in]  border_mode           Strategy to use for borders
-     * @param[in]  constant_border_value Constant value used for filling the border.
-     */
-    virtual void configure(const ITensor *input, ITensor *output, const std::array<float, 9> &matrix, BorderMode border_mode, uint8_t constant_border_value);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-    // Inherited methods overridden:
-    BorderSize border_size() const override;
-
-protected:
-    /** function to perform warp affine or warp perspective on the given window when border mode == UNDEFINED
-     *
-     * @param[in] window Region on which to execute the kernel
-     */
-    virtual void warp_undefined(const Window &window) = 0;
-    /** function to perform warp affine or warp perspective on the given window when border mode == CONSTANT
-     *
-     * @param[in] window Region on which to execute the kernel
-     */
-    virtual void warp_constant(const Window &window) = 0;
-    /** function to perform warp affine or warp perspective on the given window when border mode == REPLICATE
-     *
-     * @param[in] window Region on which to execute the kernel
-     */
-    virtual void warp_replicate(const Window &window) = 0;
-    /** Common signature for all the specialised warp functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    void (INEWarpKernel::*_func)(const Window &window);
-
-    const ITensor *_input;                 /**< Input Tensor */
-    ITensor       *_output;                /**< Output Tensor */
-    uint8_t        _constant_border_value; /**< Constant value used for filling the border. This value is used for those pixels out of the ROI when the border mode is CONSTANT */
-    std::array<float, 9> _matrix;          /**< The affine or perspective matrix. Must be 2x3 for warp affine or 3x3 for warp perspective of type float. */
-};
-
-/** Template interface for the kernel to compute warp affine
- *
- */
-template <InterpolationPolicy interpolation>
-class NEWarpAffineKernel : public INEWarpKernel
-{
-private:
-    const char *name() const override
-    {
-        return "NEWarpAffineKernel";
-    }
-    // Inherited methods overridden:
-    void warp_undefined(const Window &window) override;
-    void warp_constant(const Window &window) override;
-    void warp_replicate(const Window &window) override;
-};
-
-/** Template interface for the kernel to compute warp perspective
- *
- */
-template <InterpolationPolicy interpolation>
-class NEWarpPerspectiveKernel : public INEWarpKernel
-{
-private:
-    const char *name() const override
-    {
-        return "NEWarpPerspectiveKernel";
-    }
-    // Inherited methods overridden:
-    void warp_undefined(const Window &window) override;
-    void warp_constant(const Window &window) override;
-    void warp_replicate(const Window &window) override;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEWARPKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h
deleted file mode 100644
index 8cb3ed8796..0000000000
--- a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H
-#define ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** NEON kernel to perform reshaping on the weights used by convolution and locally connected layer
- *
- * Rearranges each 3-dimensional kernel to a single row leading to a matrix with linearized kernels.
- * In combination with the @ref NEIm2ColKernel can transform a convolution to a matrix multiplication.
- *
- * For example assuming a 3D weight kernel of 3x3 dimensions and depth of 2 we have:
- * @f[
- * \left( \begin{array}{ccc}
- * a000 & a001 & a002 \\
- * a010 & a011 & a012 \\
- * a020 & a021 & a022 \\
- * \end{array} \right)
- * \left( \begin{array}{ccc}
- * a100 & a101 & a102 \\
- * a110 & a111 & a112 \\
- * a120 & a121 & a122 \\
- * \end{array} \right)
- * \rightarrow
- * \left( \begin{array}{ccccccccc}
- * a000 & a001 & a002 & a010 & a011 & a012 & a020 & a021 & a022 & a100 & a101 & a102 & a110 & a111 & a112 & a120 & a121 & a122 \\
- * \end{array} \right)
- * @f]
- */
-class NEWeightsReshapeKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEWeightsReshapeKernel";
-    }
-    /** Constructor.*/
-    NEWeightsReshapeKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEWeightsReshapeKernel(const NEWeightsReshapeKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEWeightsReshapeKernel &operator=(const NEWeightsReshapeKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEWeightsReshapeKernel(NEWeightsReshapeKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEWeightsReshapeKernel &operator=(NEWeightsReshapeKernel &&) = default;
-    /** Default destructor */
-    ~NEWeightsReshapeKernel() = default;
-    /** Set the input and output of the kernel.
-     *
-     * @param[in]  input  The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
-     *                    and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared.
-     *                    Data types supported: All
-     * @param[in]  bias   The shared biases tensor to append.  Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
-     *                    dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input
-     *                    @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
-     * @param[out] output The output tensor. Data types supported: Same as @p input
-     */
-    void configure(const ITensor *input, const ITensor *bias, ITensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEWeightsReshapeKernel
-     *
-     * @param[in] input  The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
-     *                   and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM,  num_patches] if unshared.
-     *                   Data types supported: All
-     * @param[in] biases The shared biases tensor to append.  Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with
-     *                   dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input
-     *                   @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
-     * @param[in] output The output tensor. Should be a 2D Tensor. Data types supported: Same as @p input
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input;
-    const ITensor *_bias;
-    ITensor       *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h
deleted file mode 100644
index 64d741deab..0000000000
--- a/arm_compute/core/NEON/kernels/NEWidthConcatenateLayerKernel.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H
-#define ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-// Forward declarations
-class ITensor;
-
-/** Interface for the width concatenate kernel.
- *  The input tensor will be concatenated into the output tensor.
- */
-class NEWidthConcatenateLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEWidthConcatenateLayerKernel";
-    }
-    /** Default constructor */
-    NEWidthConcatenateLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEWidthConcatenateLayerKernel(const NEWidthConcatenateLayerKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEWidthConcatenateLayerKernel &operator=(const NEWidthConcatenateLayerKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEWidthConcatenateLayerKernel(NEWidthConcatenateLayerKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEWidthConcatenateLayerKernel &operator=(NEWidthConcatenateLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEWidthConcatenateLayerKernel() = default;
-    /** Initialise the kernel's inputs and output
-     *
-     * @param[in]     input        Input tensor info. Data types supported: All
-     * @param[in]     width_offset The offset on the X axis.
-     * @param[in,out] output       Output tensor info. Data types supported: Same as @p input.
-     */
-    void configure(const ITensorInfo *input, unsigned int width_offset, ITensorInfo *output);
-    /**  Static function to check if given info will lead to a valid configuration of @ref NEWidthConcatenateLayerKernel
-     *
-     * @param[in] input        Input tensor info. Data types supported: All
-     * @param[in] width_offset The offset on the X axis.
-     * @param[in] output       Output tensor info. Data types supported: Same as @p input.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, unsigned int width_offset, const ITensorInfo *output);
-
-    // Inherited methods overridden:
-    void run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) override;
-
-private:
-    unsigned int _width_offset;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEWIDTHCONCATENATELAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h b/arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h
deleted file mode 100644
index 8795e4aa56..0000000000
--- a/arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_NEYOLOLAYERKERNEL_H
-#define ARM_COMPUTE_NEYOLOLAYERKERNEL_H
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the YOLO layer kernel. */
-class NEYOLOLayerKernel : public INEKernel
-{
-public:
-    const char *name() const override
-    {
-        return "NEYOLOLayerKernel";
-    }
-    /** Constructor */
-    NEYOLOLayerKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEYOLOLayerKernel(const NEYOLOLayerKernel &) = delete;
-    /** Default move constructor */
-    NEYOLOLayerKernel(NEYOLOLayerKernel &&) = default;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEYOLOLayerKernel &operator=(const NEYOLOLayerKernel &) = delete;
-    /** Default move assignment operator */
-    NEYOLOLayerKernel &operator=(NEYOLOLayerKernel &&) = default;
-    /** Default destructor */
-    ~NEYOLOLayerKernel() = default;
-    /** Set the input and output tensor.
-     *
-     * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
-     *
-     * @param[in, out] input       Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
-     *                             of the activation function. Data types supported: F16/F32.
-     * @param[out]     output      Destination tensor. Data type supported: same as @p input
-     * @param[in]      act_info    Activation layer parameters.
-     * @param[in]      num_classes Number of classes to activate (must be submultiple of @p input channels)
-     */
-    void configure(ITensor *input, ITensor *output, const ActivationLayerInfo &act_info, int32_t num_classes);
-    /** Static function to check if given info will lead to a valid configuration of @ref NEYOLOLayerKernel
-     *
-     * @param[in] input       Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
-     *                        of the activation function. Data types supported: F16/F32.
-     * @param[in] output      Destination tensor info. Data type supported: same as @p input
-     * @param[in] act_info    Activation layer information.
-     * @param[in] num_classes Number of classes to activate (must be submultiple of @p input channels)
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info, int32_t num_classes);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    /** Function to run YOLO layer
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    template <typename T, int S>
-    void yolo_layer_nchw(const Window &window);
-    /** Function to run YOLO layer on tensors with NHWC format
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    template <typename T>
-    void yolo_layer_nhwc(const Window &window);
-    /** Common signature for all the yolo layer functions
-     *
-     * @param[in] window Region on which to execute the kernel.
-     */
-    using YOLOFunctionPtr = void (NEYOLOLayerKernel::*)(const Window &window);
-
-private:
-    YOLOFunctionPtr     _func;
-    ITensor            *_input;
-    ITensor            *_output;
-    ActivationLayerInfo _act_info;
-    int32_t             _num_classes;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_NEYOLOLAYERKERNEL_H */
diff --git a/arm_compute/core/NEON/kernels/assembly/arm_gemm_local.hpp b/arm_compute/core/NEON/kernels/assembly/arm_gemm_local.hpp
deleted file mode 100644
index de92cce653..0000000000
--- a/arm_compute/core/NEON/kernels/assembly/arm_gemm_local.hpp
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2018 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#pragma once
-
-/* This file is used to configure integration-specific aspects of arm_gemm into ACL */
-
-#include "arm_compute/core/CPP/CPPTypes.h"
-
-namespace arm_gemm
-{
-using CPUModel = arm_compute::CPUModel;
-using CPUInfo  = arm_compute::CPUInfo;
-} // namespace arm_compute
-
-
-
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 42e42cc2d6..306bdc6706 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -2246,5 +2246,14 @@ struct IOFormatInfo
     /** Align columns */
     bool align_columns;
 };
+
+/** Internal keypoint class for Lucas-Kanade Optical Flow */
+struct NELKInternalKeypoint
+{
+    float x{ 0.f };                 /**< x coordinate of the keypoint */
+    float y{ 0.f };                 /**< y coordinate of the keypoint */
+    bool  tracking_status{ false }; /**< the tracking status of the keypoint */
+};
+
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_TYPES_H */
diff --git a/arm_compute/core/utils/misc/Traits.h b/arm_compute/core/utils/misc/Traits.h
index 58fb1bff59..933922f63c 100644
--- a/arm_compute/core/utils/misc/Traits.h
+++ b/arm_compute/core/utils/misc/Traits.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,6 +24,7 @@
 #ifndef ARM_COMPUTE_UTILS_TRAITS_TRAITS_H
 #define ARM_COMPUTE_UTILS_TRAITS_TRAITS_H
 
+#include "arm_compute/core/Types.h"
 #include <type_traits>
 
 namespace arm_compute
diff --git a/arm_compute/runtime/CL/functions/CLHarrisCorners.h b/arm_compute/runtime/CL/functions/CLHarrisCorners.h
index 90d8c8873f..326a895d39 100644
--- a/arm_compute/runtime/CL/functions/CLHarrisCorners.h
+++ b/arm_compute/runtime/CL/functions/CLHarrisCorners.h
@@ -29,13 +29,13 @@
 #include "arm_compute/core/CL/ICLArray.h"
 #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
 #include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h"
+#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h"
+#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/functions/CLNonMaximaSuppression3x3.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
-
 #include <cstdint>
 #include <memory>
 
diff --git a/arm_compute/runtime/IOperator.h b/arm_compute/runtime/IOperator.h
index e7952bb748..0097383115 100644
--- a/arm_compute/runtime/IOperator.h
+++ b/arm_compute/runtime/IOperator.h
@@ -24,6 +24,8 @@
 #ifndef ARM_COMPUTE_IOPERATOR_H
 #define ARM_COMPUTE_IOPERATOR_H
 
+#include "arm_compute/core/ITensorPack.h"
+#include "arm_compute/core/experimental/Types.h"
 #include "arm_compute/runtime/IOperator.h"
 #include "arm_compute/runtime/IRuntimeContext.h"
 #include "arm_compute/runtime/Types.h"
diff --git a/arm_compute/runtime/ITransformWeights.h b/arm_compute/runtime/ITransformWeights.h
index 2e2e764c8e..9392be05e5 100644
--- a/arm_compute/runtime/ITransformWeights.h
+++ b/arm_compute/runtime/ITransformWeights.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,6 +25,7 @@
 #define ARM_COMPUTE_ITRANSFORMWEIGHTS_H
 
 #include <atomic>
+#include <utility>
 
 namespace arm_compute
 {
@@ -124,4 +125,4 @@ protected:
 };
 } // arm_compute
 
-#endif /*ARM_COMPUTE_ITRANSFORMWEIGHTS_H */
\ No newline at end of file
+#endif /*ARM_COMPUTE_ITRANSFORMWEIGHTS_H */
diff --git a/arm_compute/runtime/NEON/INEOperator.h b/arm_compute/runtime/NEON/INEOperator.h
index 415e767eec..a5ffc74940 100644
--- a/arm_compute/runtime/NEON/INEOperator.h
+++ b/arm_compute/runtime/NEON/INEOperator.h
@@ -25,7 +25,6 @@
 #define ARM_COMPUTE_INEOPERATOR_H
 
 #include "../../core/ITensor.h"
-#include "arm_compute/core/NEON/INEKernel.h"
 #include "arm_compute/runtime/IOperator.h"
 #include "arm_compute/runtime/IRuntimeContext.h"
 #include "arm_compute/runtime/Types.h"
@@ -34,6 +33,8 @@
 
 namespace arm_compute
 {
+class ICPPKernel;
+using INEKernel = ICPPKernel;
 namespace experimental
 {
 /** Basic interface for functions which have a single async NEON kernel */
@@ -53,6 +54,8 @@ public:
     INEOperator &operator=(const INEOperator &) = delete;
     /** Default move assignment operator */
     INEOperator &operator=(INEOperator &&) = default;
+    /** Default destructor */
+    ~INEOperator();
 
     // Inherited methods overridden:
     void run(ITensorPack &tensors) override;
diff --git a/arm_compute/runtime/NEON/INESimpleFunction.h b/arm_compute/runtime/NEON/INESimpleFunction.h
index 7f2ed2e16f..979a0f7f07 100644
--- a/arm_compute/runtime/NEON/INESimpleFunction.h
+++ b/arm_compute/runtime/NEON/INESimpleFunction.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,27 +24,38 @@
 #ifndef ARM_COMPUTE_INESIMPLEFUNCTION_H
 #define ARM_COMPUTE_INESIMPLEFUNCTION_H
 
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 
 #include <memory>
 
 namespace arm_compute
 {
+class ICPPKernel;
+class NEFillBorderKernel;
+using INEKernel = ICPPKernel;
 /** Basic interface for functions which have a single NEON kernel */
 class INESimpleFunction : public IFunction
 {
 public:
     /** Constructor */
     INESimpleFunction();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    INESimpleFunction(const INESimpleFunction &) = delete;
+    /** Default move constructor */
+    INESimpleFunction(INESimpleFunction &&) = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    INESimpleFunction &operator=(const INESimpleFunction &) = delete;
+    /** Default move assignment operator */
+    INESimpleFunction &operator=(INESimpleFunction &&) = default;
+    /** Default destructor */
+    ~INESimpleFunction();
 
     // Inherited methods overridden:
     void run() override final;
 
 protected:
-    std::unique_ptr<INEKernel> _kernel;         /**< Kernel to run */
-    NEFillBorderKernel         _border_handler; /**< Kernel to handle image borders */
+    std::unique_ptr<INEKernel>          _kernel;         /**< Kernel to run */
+    std::unique_ptr<NEFillBorderKernel> _border_handler; /**< Kernel to handle image borders */
 };
 }
 #endif /*ARM_COMPUTE_INESIMPLEFUNCTION_H */
diff --git a/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h b/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h
index 7d352eb82b..9df0d78526 100644
--- a/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h
+++ b/arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,6 @@
 #ifndef ARM_COMPUTE_INESIMPLEFUNCTIONNOBORDER_H
 #define ARM_COMPUTE_INESIMPLEFUNCTIONNOBORDER_H
 
-#include "arm_compute/core/NEON/INEKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IRuntimeContext.h"
 
@@ -32,6 +31,8 @@
 
 namespace arm_compute
 {
+class ICPPKernel;
+using INEKernel = ICPPKernel;
 /** Basic interface for functions which have a single NEON kernel and no border */
 class INESimpleFunctionNoBorder : public IFunction
 {
@@ -49,6 +50,8 @@ public:
     INESimpleFunctionNoBorder &operator=(const INESimpleFunctionNoBorder &) = delete;
     /** Default move assignment operator */
     INESimpleFunctionNoBorder &operator=(INESimpleFunctionNoBorder &&) = default;
+    /** Default destructor */
+    ~INESimpleFunctionNoBorder();
 
     // Inherited methods overridden:
     void run() override final;
diff --git a/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h b/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h
index 7b35e6db9e..df7dc2d980 100644
--- a/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h
+++ b/arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,7 @@
 #ifndef ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H
 #define ARM_COMPUTE_NEABSOLUTEDIFFERENCE_H
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
@@ -35,9 +35,21 @@ class ITensor;
  * @note The image data type for the inputs must be U8 or S16
  * @note The function calculates the absolute difference also when the 2 inputs have different image data types
  */
-class NEAbsoluteDifference : public INESimpleFunction
+class NEAbsoluteDifference : public INESimpleFunctionNoBorder
 {
 public:
+    /** Default constructor */
+    NEAbsoluteDifference() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEAbsoluteDifference(const NEAbsoluteDifference &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEAbsoluteDifference &operator=(const NEAbsoluteDifference &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEAbsoluteDifference(NEAbsoluteDifference &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEAbsoluteDifference &operator=(NEAbsoluteDifference &&) = delete;
+    /** Default destructor */
+    ~NEAbsoluteDifference();
     /** Set the inputs and output images
      *
      * @param[in]  input1 Source tensor. Data types supported: U8/S16.
diff --git a/arm_compute/runtime/NEON/functions/NEAccumulate.h b/arm_compute/runtime/NEON/functions/NEAccumulate.h
index f403a7772b..6dcef09f10 100644
--- a/arm_compute/runtime/NEON/functions/NEAccumulate.h
+++ b/arm_compute/runtime/NEON/functions/NEAccumulate.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,6 +36,18 @@ class ITensor;
 class NEAccumulate : public INESimpleFunctionNoBorder
 {
 public:
+    /** Default constructor */
+    NEAccumulate() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEAccumulate(const NEAccumulate &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEAccumulate &operator=(const NEAccumulate &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEAccumulate(NEAccumulate &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEAccumulate &operator=(NEAccumulate &&) = delete;
+    /** Default destructor */
+    ~NEAccumulate();
     /** Set the input and accumulation tensors
      *
      * @param[in]  input  Source tensor. Data type supported: U8.
@@ -48,6 +60,18 @@ public:
 class NEAccumulateWeighted : public INESimpleFunctionNoBorder
 {
 public:
+    /** Default constructor */
+    NEAccumulateWeighted() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEAccumulateWeighted(const NEAccumulateWeighted &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEAccumulateWeighted &operator=(const NEAccumulateWeighted &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEAccumulateWeighted(NEAccumulateWeighted &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEAccumulateWeighted &operator=(NEAccumulateWeighted &&) = delete;
+    /** Default destructor */
+    ~NEAccumulateWeighted();
     /** Set the input and accumulation tensors, and the scale value
      *
      * @param[in]     input    Source tensor. Data type supported: U8.
@@ -62,6 +86,18 @@ public:
 class NEAccumulateSquared : public INESimpleFunctionNoBorder
 {
 public:
+    /** Default constructor */
+    NEAccumulateSquared() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEAccumulateSquared(const NEAccumulateSquared &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEAccumulateSquared &operator=(const NEAccumulateSquared &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEAccumulateSquared(NEAccumulateSquared &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEAccumulateSquared &operator=(NEAccumulateSquared &&) = delete;
+    /** Default destructor */
+    ~NEAccumulateSquared();
     /** Set the input and accumulation tensors and the shift value.
      *
      * @param[in]     input  Source tensor. Data type supported: U8.
diff --git a/arm_compute/runtime/NEON/functions/NEActivationLayer.h b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
index cfece5c392..3f410fcd8c 100644
--- a/arm_compute/runtime/NEON/functions/NEActivationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEActivationLayer.h
@@ -46,8 +46,6 @@ public:
      * @param[in] ctx Runtime context to be used by the function
      */
     NEActivationLayer(IRuntimeContext *ctx = nullptr);
-    /** Destructor */
-    ~NEActivationLayer();
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     NEActivationLayer(const NEActivationLayer &) = delete;
     /** Default move constructor */
@@ -56,6 +54,8 @@ public:
     NEActivationLayer &operator=(const NEActivationLayer &) = delete;
     /** Default move assignment operator */
     NEActivationLayer &operator=(NEActivationLayer &&);
+    /** Destructor */
+    ~NEActivationLayer();
     /** [NEActivationLayer snippet] **/
     /** Set the input and output tensor.
      *
@@ -93,6 +93,19 @@ namespace experimental
 class NEActivationLayer : public INEOperator
 {
 public:
+    /** Constructor */
+    NEActivationLayer() = default;
+    /** Prevent instances of this class from being copied */
+    NEActivationLayer(const NEActivationLayer &) = delete;
+    /** Default move constructor */
+    NEActivationLayer(NEActivationLayer &&) = default;
+    /** Prevent instances of this class from being copied */
+    NEActivationLayer &operator=(const NEActivationLayer &) = delete;
+    /** Default move assignment operator */
+    NEActivationLayer &operator=(NEActivationLayer &&) = default;
+    /** Destructor */
+    ~NEActivationLayer();
+
     /** Set the input and output tensor.
      *
      * @param[in]  input           Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
diff --git a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
index 61762f37e1..4b13d1f44e 100644
--- a/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEArgMinMaxLayer.h
@@ -52,6 +52,16 @@ class NEArgMinMaxLayer : public IFunction
 public:
     /** Constructor */
     NEArgMinMaxLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEArgMinMaxLayer(const NEArgMinMaxLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEArgMinMaxLayer &operator=(const NEArgMinMaxLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEArgMinMaxLayer(NEArgMinMaxLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEArgMinMaxLayer &operator=(NEArgMinMaxLayer &&) = delete;
+    /** Default destructor */
+    ~NEArgMinMaxLayer();
     /** Set the input and output tensors.
      *
      * @param[in]  input  Input source tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/S32/F16/F32.
diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
index e10771ef4b..6aaa5ff4f7 100644
--- a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
+++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h
@@ -38,6 +38,18 @@ namespace experimental
 class NEArithmeticAddition : public INEOperator
 {
 public:
+    /** Constructor */
+    NEArithmeticAddition() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEArithmeticAddition(const NEArithmeticAddition &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEArithmeticAddition &operator=(const NEArithmeticAddition &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEArithmeticAddition(NEArithmeticAddition &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEArithmeticAddition &operator=(NEArithmeticAddition &&) = delete;
+    /** Default destructor */
+    ~NEArithmeticAddition();
     /** Initialise the kernel's inputs, output and conversion policy.
      *
      * Valid configurations (Input1,Input2) -> Output :
diff --git a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
index 1f77164a43..6d56a267a7 100644
--- a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,14 +24,16 @@
 #ifndef ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H
 #define ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H
 
-#include "arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
 
+#include <memory>
+
 namespace arm_compute
 {
 class ITensor;
+class NEBatchNormalizationLayerKernel;
 
 /** Basic function to run @ref NENormalizationLayerKernel and simulate a batch normalization layer.
  *
@@ -42,8 +44,18 @@ class ITensor;
 class NEBatchNormalizationLayer : public IFunction
 {
 public:
-    /** Default constructor */
+    /** Constructor */
     NEBatchNormalizationLayer();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEBatchNormalizationLayer(const NEBatchNormalizationLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEBatchNormalizationLayer &operator=(const NEBatchNormalizationLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEBatchNormalizationLayer(NEBatchNormalizationLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEBatchNormalizationLayer &operator=(NEBatchNormalizationLayer &&) = delete;
+    /** Default destructor */
+    ~NEBatchNormalizationLayer();
     /** Set the input and output tensors.
      *
      * @note If the output tensor is a nullptr or is equal to the input, the batch normalization function will be performed in-place
@@ -85,7 +97,7 @@ public:
     void run() override;
 
 private:
-    NEBatchNormalizationLayerKernel _norm_kernel; /**< Batch normalization layer kernel */
+    std::unique_ptr<NEBatchNormalizationLayerKernel> _norm_kernel; /**< Batch normalization layer kernel */
 };
 }
 #endif /* ARM_COMPUTE_NEBATCHNORMALIZATIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
index 1a6ffa9506..c2fd26d34c 100644
--- a/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEBatchToSpaceLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,18 +26,30 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEBatchToSpaceLayerKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEBatchToSpaceLayerKernel. */
 class NEBatchToSpaceLayer : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEBatchToSpaceLayer() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEBatchToSpaceLayer(const NEBatchToSpaceLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEBatchToSpaceLayer &operator=(const NEBatchToSpaceLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEBatchToSpaceLayer(NEBatchToSpaceLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEBatchToSpaceLayer &operator=(NEBatchToSpaceLayer &&) = delete;
+    /** Default destructor */
+    ~NEBatchToSpaceLayer() = default;
     /** Set the input and output tensors.
      *
      * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All.
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h
index c612a146ac..3203d2b9a7 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseAnd.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,6 +34,18 @@ class ITensor;
 class NEBitwiseAnd : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEBitwiseAnd() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEBitwiseAnd(const NEBitwiseAnd &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEBitwiseAnd &operator=(const NEBitwiseAnd &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEBitwiseAnd(NEBitwiseAnd &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEBitwiseAnd &operator=(NEBitwiseAnd &&) = delete;
+    /** Default destructor */
+    ~NEBitwiseAnd() = default;
     /** Initialise the kernel's inputs and output
      *
      * @param[in]  input1 First tensor input. Data type supported: U8.
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h
index f6ef975dc7..9fa0d38caf 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseNot.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseNot.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h
index 8fc4b0d362..fba6b784de 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseOr.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseOr.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h
index 20e23af234..c6cb584284 100644
--- a/arm_compute/runtime/NEON/functions/NEBitwiseXor.h
+++ b/arm_compute/runtime/NEON/functions/NEBitwiseXor.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h
index 14d5de4ca4..de8dfef4ed 100644
--- a/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h
+++ b/arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h
@@ -24,19 +24,20 @@
 #ifndef ARM_COMPUTE_NEBOUNDINGBOXTRANSOFORM_H
 #define ARM_COMPUTE_NEBOUNDINGBOXTRANSOFORM_H
 
-#include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEBoundingBoxTransformKernel.
  *
  * This function calls the following Neon kernels:
  * -# @ref NEBoundingBoxTransformKernel
  */
-class NEBoundingBoxTransform : public INESimpleFunction
+class NEBoundingBoxTransform : public INESimpleFunctionNoBorder
 {
 public:
     /** Set the input and output tensors.
diff --git a/arm_compute/runtime/NEON/functions/NEBox3x3.h b/arm_compute/runtime/NEON/functions/NEBox3x3.h
index 80cd5084ab..4d8b12684b 100644
--- a/arm_compute/runtime/NEON/functions/NEBox3x3.h
+++ b/arm_compute/runtime/NEON/functions/NEBox3x3.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NECannyEdge.h b/arm_compute/runtime/NEON/functions/NECannyEdge.h
index f171c3bed0..b08646de0d 100644
--- a/arm_compute/runtime/NEON/functions/NECannyEdge.h
+++ b/arm_compute/runtime/NEON/functions/NECannyEdge.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,8 +24,6 @@
 #ifndef ARM_COMPUTE_NECANNYEDGE_H
 #define ARM_COMPUTE_NECANNYEDGE_H
 
-#include "arm_compute/core/NEON/kernels/NECannyEdgeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
@@ -38,6 +36,10 @@
 namespace arm_compute
 {
 class ITensor;
+class NEGradientKernel;
+class NEFillBorderKernel;
+class NEEdgeNonMaxSuppressionKernel;
+class NEEdgeTraceKernel;
 
 /** Basic function to execute canny edge on NEON. This function calls the following NEON kernels and functions:
  *
@@ -64,6 +66,8 @@ public:
     NECannyEdge(const NECannyEdge &) = delete;
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     NECannyEdge &operator=(const NECannyEdge &) = delete;
+    /** Default destructor */
+    ~NECannyEdge();
     /** Initialise the function's source, destination, thresholds, gradient size, normalization type and border mode.
      *
      * @param[in, out] input                 Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
@@ -81,19 +85,19 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                   _memory_group;        /**< Function's memory group */
-    std::unique_ptr<IFunction>    _sobel;               /**< Pointer to Sobel kernel */
-    std::unique_ptr<INEKernel>    _gradient;            /**< Gradient kernel */
-    NEEdgeNonMaxSuppressionKernel _non_max_suppr;       /**< Non-Maxima suppression kernel */
-    NEEdgeTraceKernel             _edge_trace;          /**< Edge tracing kernel */
-    NEFillBorderKernel            _border_mag_gradient; /**< Fill border on magnitude tensor kernel */
-    NEFillBorderKernel            _border_edge_trace;   /**< Fill border before edge trace */
-    Tensor                        _gx;                  /**< Source tensor - Gx component */
-    Tensor                        _gy;                  /**< Source tensor - Gy component */
-    Tensor                        _magnitude;           /**< Source tensor - Magnitude */
-    Tensor                        _phase;               /**< Source tensor - Phase */
-    Tensor                        _nonmax;              /**< Source tensor - Non-Maxima suppressed */
-    ITensor                      *_output;              /**< Output tensor provided by the user. */
+    MemoryGroup                                    _memory_group;        /**< Function's memory group */
+    std::unique_ptr<IFunction>                     _sobel;               /**< Pointer to Sobel kernel */
+    std::unique_ptr<NEGradientKernel>              _gradient;            /**< Gradient kernel */
+    std::unique_ptr<NEEdgeNonMaxSuppressionKernel> _non_max_suppr;       /**< Non-Maxima suppression kernel */
+    std::unique_ptr<NEEdgeTraceKernel>             _edge_trace;          /**< Edge tracing kernel */
+    std::unique_ptr<NEFillBorderKernel>            _border_mag_gradient; /**< Fill border on magnitude tensor kernel */
+    std::unique_ptr<NEFillBorderKernel>            _border_edge_trace;   /**< Fill border before edge trace */
+    Tensor                                         _gx;                  /**< Source tensor - Gx component */
+    Tensor                                         _gy;                  /**< Source tensor - Gy component */
+    Tensor                                         _magnitude;           /**< Source tensor - Magnitude */
+    Tensor                                         _phase;               /**< Source tensor - Phase */
+    Tensor                                         _nonmax;              /**< Source tensor - Non-Maxima suppressed */
+    ITensor                                       *_output;              /**< Output tensor provided by the user. */
 };
 }
 #endif /* ARM_COMPUTE_NECANNYEDGE_H */
diff --git a/arm_compute/runtime/NEON/functions/NECast.h b/arm_compute/runtime/NEON/functions/NECast.h
index ca818bea27..e536317660 100644
--- a/arm_compute/runtime/NEON/functions/NECast.h
+++ b/arm_compute/runtime/NEON/functions/NECast.h
@@ -25,16 +25,17 @@
 #define ARM_COMPUTE_NECAST_H
 
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEDepthConvertLayerKernel.
  * This function ignores the scale and zeroPoint of quanized tensors,so QASYMM8 input is treated as uint8 values.
  */
-class NECast : public INESimpleFunction
+class NECast : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialize the function's source, destination
diff --git a/arm_compute/runtime/NEON/functions/NEChannelCombine.h b/arm_compute/runtime/NEON/functions/NEChannelCombine.h
index c4ced62e72..44a0504824 100644
--- a/arm_compute/runtime/NEON/functions/NEChannelCombine.h
+++ b/arm_compute/runtime/NEON/functions/NEChannelCombine.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEChannelExtract.h b/arm_compute/runtime/NEON/functions/NEChannelExtract.h
index 54059e91e1..4b6383d6b1 100644
--- a/arm_compute/runtime/NEON/functions/NEChannelExtract.h
+++ b/arm_compute/runtime/NEON/functions/NEChannelExtract.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
index f31518e85b..aa11396c20 100644
--- a/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,12 +24,14 @@
 #ifndef ARM_COMPUTE_NECHANNELSHUFFLELAYER_H
 #define ARM_COMPUTE_NECHANNELSHUFFLELAYER_H
 
+#include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEChannelShuffleLayerKernel
  *
diff --git a/arm_compute/runtime/NEON/functions/NECol2Im.h b/arm_compute/runtime/NEON/functions/NECol2Im.h
index e03ec42c4f..69459a83c1 100644
--- a/arm_compute/runtime/NEON/functions/NECol2Im.h
+++ b/arm_compute/runtime/NEON/functions/NECol2Im.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,12 +26,13 @@
 
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
+#include "arm_compute/core/Error.h"
 #include "arm_compute/core/Size2D.h"
-#include "arm_compute/core/Types.h"
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NECol2Im */
 class NECol2Im : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NEColorConvert.h b/arm_compute/runtime/NEON/functions/NEColorConvert.h
index b4c4158804..545550c04a 100644
--- a/arm_compute/runtime/NEON/functions/NEColorConvert.h
+++ b/arm_compute/runtime/NEON/functions/NEColorConvert.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h b/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h
index 44f3f860cf..b63243fec6 100644
--- a/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h
+++ b/arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,19 +24,20 @@
 #ifndef ARM_COMPUTE_NECOMPUTEALLANCHORS_H
 #define ARM_COMPUTE_NECOMPUTEALLANCHORS_H
 
-#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEComputeAllAnchorsKernel.
  *
  * This function calls the following NEON kernels:
  * -# @ref NEComputeAllAnchorsKernel
  */
-class NEComputeAllAnchors : public INESimpleFunction
+class NEComputeAllAnchors : public INESimpleFunctionNoBorder
 {
 public:
     /** Set the input and output tensors.
diff --git a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
index 82b4517dd3..fd35d0bc46 100644
--- a/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConcatenateLayer.h
@@ -26,7 +26,6 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/INEKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/INEOperator.h"
 #include "support/Requires.h"
@@ -106,8 +105,18 @@ namespace experimental
 class NEConcatenation : public INEOperator
 {
 public:
-    /** Default constructor */
+    /** Constructor */
     NEConcatenation();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConcatenation(const NEConcatenation &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConcatenation &operator=(const NEConcatenation &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConcatenation(NEConcatenation &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConcatenation &operator=(NEConcatenation &&) = delete;
+    /** Default destructor */
+    ~NEConcatenation() = default;
     /** Initialise the kernel's inputs vector and output.
      *
      * @note Input and output tensor dimensions preconditions defer depending on the concatenation axis.
@@ -135,9 +144,9 @@ public:
     void run(ITensorPack &tensors) override;
 
 private:
-    std::vector<std::unique_ptr<INEKernel>> _concat_kernels;
-    unsigned int                            _num_inputs;
-    unsigned int                            _axis;
+    std::vector<std::unique_ptr<ICPPKernel>> _concat_kernels;
+    unsigned int                             _num_inputs;
+    unsigned int                             _axis;
 };
 } // namespace experimental
 } // namespace arm_compute
diff --git a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
index 42a62dc0ab..984e8d68c0 100644
--- a/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
+++ b/arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,16 +24,17 @@
 #ifndef ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H
 #define ARM_COMPUTE_NECONVERTFULLYCONNECTEDWEIGHTS_H
 
-#include "arm_compute/core/NEON/kernels/NEConvertFullyConnectedWeightsKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/ITransformWeights.h"
 #include "arm_compute/runtime/NEON/NEScheduler.h"
 #include "arm_compute/runtime/Tensor.h"
+#include <memory>
 
 namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class NEConvertFullyConnectedWeightsKernel;
 
 /** Basic function to run @ref NEConvertFullyConnectedWeightsKernel. */
 class NEConvertFullyConnectedWeights : public IFunction
@@ -41,6 +42,16 @@ class NEConvertFullyConnectedWeights : public IFunction
 public:
     /** Default constructor */
     NEConvertFullyConnectedWeights();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvertFullyConnectedWeights(const NEConvertFullyConnectedWeights &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvertFullyConnectedWeights &operator=(const NEConvertFullyConnectedWeights &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvertFullyConnectedWeights(NEConvertFullyConnectedWeights &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvertFullyConnectedWeights &operator=(NEConvertFullyConnectedWeights &&) = delete;
+    /** Default destructor */
+    ~NEConvertFullyConnectedWeights();
     /** Initialize the function.
      *
      * @param[in]  input                Source weights tensor to convert. Must be 2 dimensional. Data types supported: All.
@@ -64,7 +75,7 @@ public:
     void run() override;
 
 private:
-    NEConvertFullyConnectedWeightsKernel _kernel;
+    std::unique_ptr<NEConvertFullyConnectedWeightsKernel> _kernel;
 };
 
 namespace weights_transformations
diff --git a/arm_compute/runtime/NEON/functions/NEConvolution.h b/arm_compute/runtime/NEON/functions/NEConvolution.h
index eb16a4582e..9415cf0835 100644
--- a/arm_compute/runtime/NEON/functions/NEConvolution.h
+++ b/arm_compute/runtime/NEON/functions/NEConvolution.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,8 +24,6 @@
 #ifndef ARM_COMPUTE_NECONVOLUTION_H
 #define ARM_COMPUTE_NECONVOLUTION_H
 
-#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
@@ -39,6 +37,13 @@
 namespace arm_compute
 {
 class ITensor;
+class NEFillBorderKernel;
+template <unsigned int matrix_size>
+class NEConvolutionKernel;
+template <unsigned int matrix_size>
+class NESeparableConvolutionHorKernel;
+template <unsigned int matrix_size>
+class NESeparableConvolutionVertKernel;
 
 /** Basic function to execute convolution of size 3x3. This function calls the following NEON kernels:
  *
@@ -49,6 +54,18 @@ class ITensor;
 class NEConvolution3x3 : public INESimpleFunction
 {
 public:
+    /** Constructor */
+    NEConvolution3x3() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvolution3x3(const NEConvolution3x3 &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvolution3x3 &operator=(const NEConvolution3x3 &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvolution3x3(NEConvolution3x3 &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvolution3x3 &operator=(NEConvolution3x3 &&) = delete;
+    /** Default destructor */
+    ~NEConvolution3x3();
     /** Initialize the function's source, destination, conv and border_mode.
      *
      * @param[in,out] input                 Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
@@ -74,6 +91,16 @@ class NEConvolutionSquare : public IFunction
 public:
     /** Default constructor */
     NEConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvolutionSquare(const NEConvolutionSquare &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvolutionSquare &operator=(const NEConvolutionSquare &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvolutionSquare(NEConvolutionSquare &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvolutionSquare &operator=(NEConvolutionSquare &&) = delete;
+    /** Default destructor */
+    ~NEConvolutionSquare();
     /** Initialize the function's source, destination, conv and border_mode.
      *
      * @param[in,out] input                 Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
@@ -89,13 +116,13 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                                   _memory_group;   /**< Function memory group */
-    Tensor                                        _tmp;            /**< temporary buffer for output of horizontal pass */
-    bool                                          _is_separable;   /**< true if the convolution can be separated */
-    NESeparableConvolutionHorKernel<matrix_size>  _kernel_hor;     /**< kernel for horizontal pass of separated convolution */
-    NESeparableConvolutionVertKernel<matrix_size> _kernel_vert;    /**< kernel for vertical pass of separated convolution */
-    NEConvolutionKernel<matrix_size>              _kernel;         /**< kernel for non-separated convolution **/
-    NEFillBorderKernel                            _border_handler; /**< kernel for border handling */
+    MemoryGroup                                                    _memory_group;   /**< Function memory group */
+    Tensor                                                         _tmp;            /**< temporary buffer for output of horizontal pass */
+    bool                                                           _is_separable;   /**< true if the convolution can be separated */
+    std::unique_ptr<NESeparableConvolutionHorKernel<matrix_size>>  _kernel_hor;     /**< kernel for horizontal pass of separated convolution */
+    std::unique_ptr<NESeparableConvolutionVertKernel<matrix_size>> _kernel_vert;    /**< kernel for vertical pass of separated convolution */
+    std::unique_ptr<NEConvolutionKernel<matrix_size>>              _kernel;         /**< kernel for non-separated convolution **/
+    std::unique_ptr<NEFillBorderKernel>                            _border_handler; /**< kernel for border handling */
 };
 
 /** Basic function to run 5x5 convolution. */
@@ -115,6 +142,18 @@ using NEConvolution9x9 = NEConvolutionSquare<9>;
 class NEConvolutionRectangle : public INESimpleFunction
 {
 public:
+    /** Constructor */
+    NEConvolutionRectangle() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvolutionRectangle(const NEConvolutionRectangle &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvolutionRectangle &operator=(const NEConvolutionRectangle &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvolutionRectangle(NEConvolutionRectangle &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvolutionRectangle &operator=(NEConvolutionRectangle &&) = delete;
+    /** Default destructor */
+    ~NEConvolutionRectangle();
     /** Initialize the function's source, destination, conv and border_mode.
      *
      * @param[in,out] input                 Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
index e8b425b459..54dae57752 100644
--- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -75,7 +75,16 @@ class NEConvolutionLayer : public IFunction
 public:
     /** Constructor */
     NEConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvolutionLayer(const NEConvolutionLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvolutionLayer &operator=(const NEConvolutionLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvolutionLayer(NEConvolutionLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvolutionLayer &operator=(NEConvolutionLayer &&) = delete;
+    /** Default destructor */
+    ~NEConvolutionLayer() = default;
     /** Set the input and output tensors.
      *
      * @param[in]  input            Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
diff --git a/arm_compute/runtime/NEON/functions/NECopy.h b/arm_compute/runtime/NEON/functions/NECopy.h
index df1a49863a..a58ac9e620 100644
--- a/arm_compute/runtime/NEON/functions/NECopy.h
+++ b/arm_compute/runtime/NEON/functions/NECopy.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,11 +30,24 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NECopyKernel */
 class NECopy : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NECopy() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NECopy(const NECopy &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NECopy &operator=(const NECopy &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NECopy(NECopy &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NECopy &operator=(NECopy &&) = delete;
+    /** Default destructor */
+    ~NECopy();
     /** Initialise the function's source and destination.
      *
      * @param[in]  input  Source tensor. Data types supported: All
diff --git a/arm_compute/runtime/NEON/functions/NECropResize.h b/arm_compute/runtime/NEON/functions/NECropResize.h
index 361c236293..5c3733f8ee 100644
--- a/arm_compute/runtime/NEON/functions/NECropResize.h
+++ b/arm_compute/runtime/NEON/functions/NECropResize.h
@@ -24,7 +24,6 @@
 #ifndef ARM_COMPUTE_NEON_CROP_RESIZE_H
 #define ARM_COMPUTE_NEON_CROP_RESIZE_H
 
-#include "arm_compute/core/NEON/kernels/NECropKernel.h"
 #include "arm_compute/runtime/NEON/functions/NEScale.h"
 
 #include <memory>
@@ -33,6 +32,7 @@ namespace arm_compute
 {
 // Forward Declarations
 class ITensor;
+class NECropKernel;
 
 /** Function to perform cropping and resizing */
 class NECropResize : public IFunction
@@ -49,7 +49,7 @@ public:
     /** Allow instances of this class to be moved */
     NECropResize &operator=(NECropResize &&) = default;
     /** Default destructor */
-    virtual ~NECropResize() = default;
+    ~NECropResize();
 
     /** Configure kernel
      *
diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
index 89f3958417..c9817a63c1 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
@@ -32,6 +32,7 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /**Basic function to run @ref NEDepthConvertLayerKernel */
 class NEDepthConvertLayer : public INESimpleFunctionNoBorder
@@ -43,6 +44,8 @@ public:
     NEDepthConvertLayer(const NEDepthConvertLayer &) = delete;
     /** Prevent instances of this class from being copied (As this class contains pointers)*/
     const NEDepthConvertLayer &operator=(const NEDepthConvertLayer &) = delete;
+    /** Default destructor */
+    ~NEDepthConvertLayer() = default;
     /** Initialize the function's source, destination
      *
      * Valid conversions Input -> Output :
diff --git a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
index 22bbd6e716..51f7ff7770 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthToSpaceLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,7 +26,6 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEDepthToSpaceLayerKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
@@ -34,11 +33,24 @@ namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEDepthToSpaceLayerKernel. */
 class NEDepthToSpaceLayer : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEDepthToSpaceLayer() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEDepthToSpaceLayer(const NEDepthToSpaceLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEDepthToSpaceLayer &operator=(const NEDepthToSpaceLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEDepthToSpaceLayer(NEDepthToSpaceLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEDepthToSpaceLayer &operator=(NEDepthToSpaceLayer &&) = delete;
+    /** Default destructor */
+    ~NEDepthToSpaceLayer() = default;
     /** Set the input and output tensors.
      *
      * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
index c6b98ed435..dc70aec7ff 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -24,17 +24,16 @@
 #ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H
 #define ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H
 
-#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayerNativeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
 #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEPermute.h"
 #include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h"
+#include <memory>
 
 namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class NEDepthwiseConvolutionLayerNativeKernel;
 
 /** Function to execute a depthwise convolution.
  */
@@ -51,6 +50,8 @@ public:
     NEDepthwiseConvolutionLayer &operator=(const NEDepthwiseConvolutionLayer &) = delete;
     /** Default move assignment operator */
     NEDepthwiseConvolutionLayer &operator=(NEDepthwiseConvolutionLayer &&) = default;
+    /** Default destructor */
+    ~NEDepthwiseConvolutionLayer();
     /** Initialize the function's source, destination, weights and convolution information.
      *
      * @param[in, out] input            Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32
@@ -133,6 +134,8 @@ private:
         NEDepthwiseConvolutionLayerOptimizedInternal &operator=(const NEDepthwiseConvolutionLayerOptimizedInternal &) = delete;
         /** Default move assignment operator */
         NEDepthwiseConvolutionLayerOptimizedInternal &operator=(NEDepthwiseConvolutionLayerOptimizedInternal &&) = default;
+        /** Default destructor */
+        ~NEDepthwiseConvolutionLayerOptimizedInternal() = default;
         /** Initialize the function's source, destination, kernels and border_size.
          *
          * @param[in, out] input            Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
@@ -170,25 +173,23 @@ private:
         void prepare() override;
 
     private:
-        MemoryGroup                               _memory_group;
-        NEDepthwiseConvolutionAssemblyDispatch    _dwc_optimized_func;
-        NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
-        NEFillBorderKernel                        _border_handler;
-        NEPermute                                 _permute_input;
-        NEPermute                                 _permute_weights;
-        NEPermute                                 _permute_output;
-        NEActivationLayer                         _activationlayer_function;
-        Tensor                                    _accumulator;
-        Tensor                                    _permuted_input;
-        Tensor                                    _permuted_weights;
-        Tensor                                    _permuted_output;
-        const ITensor                            *_original_weights;
-        bool                                      _has_bias;
-        bool                                      _is_quantized;
-        bool                                      _is_nchw;
-        bool                                      _permute;
-        bool                                      _is_activationlayer_enabled;
-        bool                                      _is_prepared;
+        MemoryGroup                            _memory_group;
+        NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func;
+        NEPermute                              _permute_input;
+        NEPermute                              _permute_weights;
+        NEPermute                              _permute_output;
+        NEActivationLayer                      _activationlayer_function;
+        Tensor                                 _accumulator;
+        Tensor                                 _permuted_input;
+        Tensor                                 _permuted_weights;
+        Tensor                                 _permuted_output;
+        const ITensor                         *_original_weights;
+        bool                                   _has_bias;
+        bool                                   _is_quantized;
+        bool                                   _is_nchw;
+        bool                                   _permute;
+        bool                                   _is_activationlayer_enabled;
+        bool                                   _is_prepared;
     };
 
     /** Basic function to execute a generic depthwise convolution. This function calls the following NEON kernel:
@@ -209,6 +210,8 @@ private:
         NEDepthwiseConvolutionLayerGeneric &operator=(const NEDepthwiseConvolutionLayerGeneric &) = delete;
         /** Default move assignment operator */
         NEDepthwiseConvolutionLayerGeneric &operator=(NEDepthwiseConvolutionLayerGeneric &&) = default;
+        /** Default destructor */
+        ~NEDepthwiseConvolutionLayerGeneric() = default;
         /** Initialize the function's source, destination, weights and convolution information.
          *
          * @param[in, out] input            Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32. (Written to only for border filling).
@@ -248,18 +251,18 @@ private:
         void prepare() override;
 
     private:
-        NEDepthwiseConvolutionLayerNativeKernel _depthwise_conv_kernel;
-        NEPermute                               _permute_input;
-        NEPermute                               _permute_weights;
-        NEPermute                               _permute_output;
-        NEActivationLayer                       _activationlayer_function;
-        Tensor                                  _permuted_input;
-        Tensor                                  _permuted_weights;
-        Tensor                                  _permuted_output;
-        bool                                    _is_prepared;
-        bool                                    _is_nchw;
-        bool                                    _is_activationlayer_enabled;
-        const ITensor                          *_original_weights;
+        std::unique_ptr<NEDepthwiseConvolutionLayerNativeKernel> _depthwise_conv_kernel;
+        NEPermute                                                _permute_input;
+        NEPermute                                                _permute_weights;
+        NEPermute                                                _permute_output;
+        NEActivationLayer                                        _activationlayer_function;
+        Tensor                                                   _permuted_input;
+        Tensor                                                   _permuted_weights;
+        Tensor                                                   _permuted_output;
+        bool                                                     _is_prepared;
+        bool                                                     _is_nchw;
+        bool                                                     _is_activationlayer_enabled;
+        const ITensor                                           *_original_weights;
     };
 
     DepthwiseConvolutionFunction                 _depth_conv_func;
diff --git a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
index 77295bc089..f52d709c74 100644
--- a/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDequantizationLayer.h
@@ -32,6 +32,7 @@ namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEDequantizationLayerKernel that dequantizes an input tensor */
 class NEDequantizationLayer : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NEDerivative.h b/arm_compute/runtime/NEON/functions/NEDerivative.h
index 8eb21425ac..7d852d0ffe 100644
--- a/arm_compute/runtime/NEON/functions/NEDerivative.h
+++ b/arm_compute/runtime/NEON/functions/NEDerivative.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,16 +24,16 @@
 #ifndef ARM_COMPUTE_NEDERIVATIVE_H
 #define ARM_COMPUTE_NEDERIVATIVE_H
 
-#include "arm_compute/core/NEON/kernels/NEDerivativeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 
-#include <cstdint>
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
+class NEDerivativeKernel;
+class NEFillBorderKernel;
 
 /** Basic function to execute first order derivative operator. This function calls the following NEON kernels:
  *
@@ -46,6 +46,16 @@ class NEDerivative : public IFunction
 public:
     /** Default constructor */
     NEDerivative();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEDerivative(const NEDerivative &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEDerivative &operator=(const NEDerivative &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEDerivative(NEDerivative &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEDerivative &operator=(NEDerivative &&) = delete;
+    /** Default destructor */
+    ~NEDerivative();
     /** Initialise the function's source, destinations and border mode.
      *
      * @note At least one of output_x or output_y must be not NULL.
@@ -63,8 +73,8 @@ public:
     void run() override;
 
 private:
-    NEDerivativeKernel _kernel;         /**< Derivative kernel */
-    NEFillBorderKernel _border_handler; /**< Kernel to handle tensor borders */
+    std::unique_ptr<NEDerivativeKernel> _kernel;         /**< Derivative kernel */
+    std::unique_ptr<NEFillBorderKernel> _border_handler; /**< Kernel to handle tensor borders */
 };
 }
 #endif /* ARM_COMPUTE_NEDERIVATIVE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h
index e0431b2b31..d5c1f0ab6f 100644
--- a/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDetectionPostProcessLayer.h
@@ -53,6 +53,8 @@ public:
     NEDetectionPostProcessLayer(const NEDetectionPostProcessLayer &) = delete;
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     NEDetectionPostProcessLayer &operator=(const NEDetectionPostProcessLayer &) = delete;
+    /** Default destructor */
+    ~NEDetectionPostProcessLayer() = default;
     /** Configure the detection output layer NE function
      *
      * @param[in]  input_box_encoding The bounding box input tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F32.
diff --git a/arm_compute/runtime/NEON/functions/NEDilate.h b/arm_compute/runtime/NEON/functions/NEDilate.h
index 6dae2c7029..33be5c8fba 100644
--- a/arm_compute/runtime/NEON/functions/NEDilate.h
+++ b/arm_compute/runtime/NEON/functions/NEDilate.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
index d1c811c363..5b6ed55be2 100644
--- a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h
@@ -24,9 +24,6 @@
 #ifndef ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H
 #define ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H
 
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerOutputStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
@@ -38,6 +35,10 @@
 
 namespace arm_compute
 {
+class NEDirectConvolutionLayerOutputStageKernel;
+class NEDirectConvolutionLayerKernel;
+class NEFillBorderKernel;
+
 /** Function to run the direct convolution.
  *
  *  This function calls the following NEON kernels:
@@ -51,6 +52,16 @@ class NEDirectConvolutionLayer : public IFunction
 public:
     /** Constructor */
     NEDirectConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEDirectConvolutionLayer(const NEDirectConvolutionLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEDirectConvolutionLayer &operator=(const NEDirectConvolutionLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEDirectConvolutionLayer(NEDirectConvolutionLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEDirectConvolutionLayer &operator=(NEDirectConvolutionLayer &&) = delete;
+    /** Default destructor */
+    ~NEDirectConvolutionLayer();
     /** Set the input, weights, biases and output tensors.
      *
      * @note: DirectConvolution only works in the following configurations:
@@ -97,16 +108,16 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                               _memory_group;
-    NEDirectConvolutionLayerOutputStageKernel _output_stage_kernel;
-    NEDirectConvolutionLayerKernel            _conv_kernel;
-    NEFillBorderKernel                        _input_border_handler;
-    NEActivationLayer                         _activationlayer_function;
-    Tensor                                    _accumulator;
-    bool                                      _has_bias;
-    bool                                      _is_activationlayer_enabled;
-    unsigned int                              _dim_split;
-    bool                                      _is_padding_required;
+    MemoryGroup                                                _memory_group;
+    std::unique_ptr<NEDirectConvolutionLayerOutputStageKernel> _output_stage_kernel;
+    std::unique_ptr<NEDirectConvolutionLayerKernel>            _conv_kernel;
+    std::unique_ptr<NEFillBorderKernel>                        _input_border_handler;
+    NEActivationLayer                                          _activationlayer_function;
+    Tensor                                                     _accumulator;
+    bool                                                       _has_bias;
+    bool                                                       _is_activationlayer_enabled;
+    unsigned int                                               _dim_split;
+    bool                                                       _is_padding_required;
 };
 }
 #endif /* ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h
index 8b3301889a..46a7316705 100644
--- a/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEElementwiseUnaryLayer.h
@@ -24,11 +24,13 @@
 #ifndef ARM_COMPUTE_NEELEMENTWISEUNARYLAYER_H
 #define ARM_COMPUTE_NEELEMENTWISEUNARYLAYER_H
 
+#include "arm_compute/core/Error.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to perform inverse square root on an input tensor. */
 class NERsqrtLayer : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h b/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h
index 5c0c323591..36c4902c04 100644
--- a/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h
+++ b/arm_compute/runtime/NEON/functions/NEEqualizeHistogram.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,9 +24,6 @@
 #ifndef ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H
 #define ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H
 
-#include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h"
-#include "arm_compute/core/NEON/kernels/NETableLookupKernel.h"
 #include "arm_compute/runtime/Distribution1D.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/Lut.h"
@@ -36,6 +33,9 @@
 namespace arm_compute
 {
 class ITensor;
+class NEHistogramKernel;
+class NECumulativeDistributionKernel;
+class NETableLookupKernel;
 using IImage = ITensor;
 
 /** Basic function to execute histogram equalization. This function calls the following NEON kernels:
@@ -50,6 +50,16 @@ class NEEqualizeHistogram : public IFunction
 public:
     /** Default Constructor. */
     NEEqualizeHistogram();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEEqualizeHistogram(const NEEqualizeHistogram &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEEqualizeHistogram &operator=(const NEEqualizeHistogram &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEEqualizeHistogram(NEEqualizeHistogram &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEEqualizeHistogram &operator=(NEEqualizeHistogram &&) = delete;
+    /** Default destructor */
+    ~NEEqualizeHistogram();
     /** Initialise the kernel's inputs.
      *
      * @note Currently the width of the input image must be a multiple of 16.
@@ -63,15 +73,15 @@ public:
     void run() override;
 
 private:
-    NEHistogramKernel              _histogram_kernel;        /**< Kernel that calculates the histogram of input. */
-    NECumulativeDistributionKernel _cd_histogram_kernel;     /**< Kernel that calculates the cumulative distribution
+    std::unique_ptr<NEHistogramKernel>              _histogram_kernel;        /**< Kernel that calculates the histogram of input. */
+    std::unique_ptr<NECumulativeDistributionKernel> _cd_histogram_kernel;     /**< Kernel that calculates the cumulative distribution
                                                                   and creates the relevant LookupTable. */
-    NETableLookupKernel            _map_histogram_kernel;    /**< Kernel that maps the input to output using the lut. */
-    Distribution1D                 _hist;                    /**< Distribution that holds the histogram of the input image. */
-    Distribution1D                 _cum_dist;                /**< Distribution that holds the cummulative distribution of the input histogram. */
-    Lut                            _cd_lut;                  /**< Holds the equalization lookuptable. */
-    static constexpr uint32_t      nr_bins{ 256 };           /**< Histogram bins of the internal histograms. */
-    static constexpr uint32_t      max_range{ nr_bins - 1 }; /**< Histogram range of the internal histograms. */
+    std::unique_ptr<NETableLookupKernel>            _map_histogram_kernel;    /**< Kernel that maps the input to output using the lut. */
+    Distribution1D                                  _hist;                    /**< Distribution that holds the histogram of the input image. */
+    Distribution1D                                  _cum_dist;                /**< Distribution that holds the cummulative distribution of the input histogram. */
+    Lut                                             _cd_lut;                  /**< Holds the equalization lookuptable. */
+    static constexpr uint32_t                       nr_bins{ 256 };           /**< Histogram bins of the internal histograms. */
+    static constexpr uint32_t                       max_range{ nr_bins - 1 }; /**< Histogram range of the internal histograms. */
 };
 }
 #endif /*ARM_COMPUTE_NEEQUALIZEHISTOGRAM_H */
diff --git a/arm_compute/runtime/NEON/functions/NEErode.h b/arm_compute/runtime/NEON/functions/NEErode.h
index 3e84c2b758..e2d76c1e1d 100644
--- a/arm_compute/runtime/NEON/functions/NEErode.h
+++ b/arm_compute/runtime/NEON/functions/NEErode.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEFFT1D.h b/arm_compute/runtime/NEON/functions/NEFFT1D.h
index 312b46b10f..4b6cc3fd18 100644
--- a/arm_compute/runtime/NEON/functions/NEFFT1D.h
+++ b/arm_compute/runtime/NEON/functions/NEFFT1D.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,19 +24,21 @@
 #ifndef ARM_COMPUTE_NEFFT1D_H
 #define ARM_COMPUTE_NEFFT1D_H
 
-#include "arm_compute/core/NEON/kernels/NEFFTDigitReverseKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFFTRadixStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFFTScaleKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 
 #include "arm_compute/runtime/FunctionDescriptors.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/Tensor.h"
 
+#include <memory>
+
 namespace arm_compute
 {
 // Forward declaration
 class ITensor;
+class NEFFTDigitReverseKernel;
+class NEFFTRadixStageKernel;
+class NEFFTScaleKernel;
 
 /** Basic function to execute one dimensional FFT. This function calls the following NEON kernels:
  *
@@ -49,6 +51,16 @@ class NEFFT1D : public IFunction
 public:
     /** Default Constructor */
     NEFFT1D(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEFFT1D(const NEFFT1D &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEFFT1D &operator=(const NEFFT1D &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEFFT1D(NEFFT1D &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEFFT1D &operator=(NEFFT1D &&) = delete;
+    /** Default destructor */
+    ~NEFFT1D();
     /** Initialise the function's source and destinations.
      *
      * @param[in]  input  Source tensor. Data types supported: F32. Number of channels supported: 1 (real tensor) or 2 (complex tensor).
@@ -71,15 +83,15 @@ public:
     void run() override;
 
 protected:
-    MemoryGroup                        _memory_group;
-    NEFFTDigitReverseKernel            _digit_reverse_kernel;
-    std::vector<NEFFTRadixStageKernel> _fft_kernels;
-    NEFFTScaleKernel                   _scale_kernel;
-    Tensor                             _digit_reversed_input;
-    Tensor                             _digit_reverse_indices;
-    unsigned int                       _num_ffts;
-    unsigned int                       _axis;
-    bool                               _run_scale;
+    MemoryGroup                                         _memory_group;
+    std::unique_ptr<NEFFTDigitReverseKernel>            _digit_reverse_kernel;
+    std::vector<std::unique_ptr<NEFFTRadixStageKernel>> _fft_kernels;
+    std::unique_ptr<NEFFTScaleKernel>                   _scale_kernel;
+    Tensor                                              _digit_reversed_input;
+    Tensor                                              _digit_reverse_indices;
+    unsigned int                                        _num_ffts;
+    unsigned int                                        _axis;
+    bool                                                _run_scale;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_NEFFT1D_H */
diff --git a/arm_compute/runtime/NEON/functions/NEFFT2D.h b/arm_compute/runtime/NEON/functions/NEFFT2D.h
index efcce2e9a4..18e72c1a2f 100644
--- a/arm_compute/runtime/NEON/functions/NEFFT2D.h
+++ b/arm_compute/runtime/NEON/functions/NEFFT2D.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,6 +46,16 @@ class NEFFT2D : public IFunction
 public:
     /** Default Constructor */
     NEFFT2D(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEFFT2D(const NEFFT2D &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEFFT2D &operator=(const NEFFT2D &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEFFT2D(NEFFT2D &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEFFT2D &operator=(NEFFT2D &&) = delete;
+    /** Default destructor */
+    ~NEFFT2D();
     /** Initialise the function's source and destinations
      *
      * @param[in]  input  Source tensor. Data types supported: F32.
diff --git a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
index dd57900f2a..b3e98fc2d6 100644
--- a/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFFTConvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -69,6 +69,8 @@ public:
     NEFFTConvolutionLayer &operator=(const NEFFTConvolutionLayer &) = delete;
     /** Default move assignment operator */
     NEFFTConvolutionLayer &operator=(NEFFTConvolutionLayer &&) = default;
+    /** Default destructor */
+    ~NEFFTConvolutionLayer();
     /** Set the input and output tensors.
      *
      * @note: This function only works with any square kernel size and unit strides for both NCHW and NHWC data layout
diff --git a/arm_compute/runtime/NEON/functions/NEFastCorners.h b/arm_compute/runtime/NEON/functions/NEFastCorners.h
index cc69e77ebb..025038bb28 100644
--- a/arm_compute/runtime/NEON/functions/NEFastCorners.h
+++ b/arm_compute/runtime/NEON/functions/NEFastCorners.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,10 +24,6 @@
 #ifndef ARM_COMPUTE_NEFASTCORNERS_H
 #define ARM_COMPUTE_NEFASTCORNERS_H
 
-#include "arm_compute/core/NEON/kernels/NEFastCornersKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillArrayKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/Array.h"
 #include "arm_compute/runtime/IFunction.h"
@@ -41,6 +37,10 @@
 namespace arm_compute
 {
 class ITensor;
+class NENonMaximaSuppression3x3Kernel;
+class NEFastCornersKernel;
+class NEFillBorderKernel;
+class NEFillArrayKernel;
 using IImage = ITensor;
 
 /** Basic function to execute fast corners. This function call the following NEON kernels:
@@ -55,6 +55,16 @@ class NEFastCorners : public IFunction
 public:
     /** Constructor */
     NEFastCorners(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEFastCorners(const NEFastCorners &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEFastCorners &operator=(const NEFastCorners &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEFastCorners(NEFastCorners &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEFastCorners &operator=(NEFastCorners &&) = delete;
+    /** Default destructor */
+    ~NEFastCorners();
     /** Initialize the function's source, destination, conv and border_mode.
      *
      * @param[in, out] input                 Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
@@ -71,14 +81,14 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                     _memory_group;
-    NEFastCornersKernel             _fast_corners_kernel;
-    NEFillBorderKernel              _border_handler;
-    NENonMaximaSuppression3x3Kernel _nonmax_kernel;
-    NEFillArrayKernel               _fill_kernel;
-    Image                           _output;
-    Image                           _suppressed;
-    bool                            _non_max;
+    MemoryGroup                                      _memory_group;
+    std::unique_ptr<NEFastCornersKernel>             _fast_corners_kernel;
+    std::unique_ptr<NEFillBorderKernel>              _border_handler;
+    std::unique_ptr<NENonMaximaSuppression3x3Kernel> _nonmax_kernel;
+    std::unique_ptr<NEFillArrayKernel>               _fill_kernel;
+    Image                                            _output;
+    Image                                            _suppressed;
+    bool                                             _non_max;
 };
 }
 #endif /*ARM_COMPUTE_NEFASTCORNERS_H */
diff --git a/arm_compute/runtime/NEON/functions/NEFill.h b/arm_compute/runtime/NEON/functions/NEFill.h
index 1c3c546c68..14d690f419 100644
--- a/arm_compute/runtime/NEON/functions/NEFill.h
+++ b/arm_compute/runtime/NEON/functions/NEFill.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,6 @@
 #ifndef ARM_COMPUTE_NEFILL_H
 #define ARM_COMPUTE_NEFILL_H
 
-#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
 #include "arm_compute/core/PixelValue.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
diff --git a/arm_compute/runtime/NEON/functions/NEFillBorder.h b/arm_compute/runtime/NEON/functions/NEFillBorder.h
index 3ac23be731..e9a08ef7ec 100644
--- a/arm_compute/runtime/NEON/functions/NEFillBorder.h
+++ b/arm_compute/runtime/NEON/functions/NEFillBorder.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,15 +24,16 @@
 #ifndef ARM_COMPUTE_NEFILLBORDER_H
 #define ARM_COMPUTE_NEFILLBORDER_H
 
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
 #include "arm_compute/core/PixelValue.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
+#include <memory>
 
 namespace arm_compute
 {
 // Forward declaration
 class ITensor;
+class NEFillBorderKernel;
 
 /** Basic function to run @ref NEFillBorderKernel */
 class NEFillBorder : public IFunction
@@ -53,7 +54,7 @@ public:
     void run() override;
 
 private:
-    NEFillBorderKernel _border_handler; /**< Kernel to handle image borders */
+    std::unique_ptr<NEFillBorderKernel> _border_handler; /**< Kernel to handle image borders */
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_NEFILLBORDER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
index 73da254ef5..9f0d5226de 100644
--- a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,6 +30,7 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to execute flatten layer kernel. */
 class NEFlattenLayer : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NEFloor.h b/arm_compute/runtime/NEON/functions/NEFloor.h
index 12f0ee20ba..7f4248eadb 100644
--- a/arm_compute/runtime/NEON/functions/NEFloor.h
+++ b/arm_compute/runtime/NEON/functions/NEFloor.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,6 +31,7 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEFloorKernel */
 class NEFloor : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
index 21df3c4aef..3ab3d81262 100644
--- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
@@ -26,25 +26,36 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h"
+#include "arm_compute/runtime/NEON/functions/NEFlattenLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMM.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
 #include "arm_compute/runtime/Tensor.h"
 
 namespace arm_compute
 {
+class NEFlattenLayerKernel;
+
 /** Basic function to reshape the weights of Fully Connected layer with NEON. This function calls the following kernels:
- *
- *  -# @ref NETransposeKernel
  *
  * @note  The fully connected layer accepts "weights" tensors only with 2 dimensions.
  */
 class NEFullyConnectedLayerReshapeWeights : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEFullyConnectedLayerReshapeWeights() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEFullyConnectedLayerReshapeWeights(const NEFullyConnectedLayerReshapeWeights &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEFullyConnectedLayerReshapeWeights &operator=(const NEFullyConnectedLayerReshapeWeights &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEFullyConnectedLayerReshapeWeights(NEFullyConnectedLayerReshapeWeights &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEFullyConnectedLayerReshapeWeights &operator=(NEFullyConnectedLayerReshapeWeights &&) = delete;
+    /** Default destructor */
+    ~NEFullyConnectedLayerReshapeWeights() = default;
     /** Set the input and output tensors.
      *
      * @param[in]  input  Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
@@ -122,6 +133,8 @@ public:
     NEFullyConnectedLayer &operator=(const NEFullyConnectedLayer &) = delete;
     /** Default move assignment operator */
     NEFullyConnectedLayer &operator=(NEFullyConnectedLayer &&) = default;
+    /** Default destructor */
+    ~NEFullyConnectedLayer();
     /** Set the input and output tensors.
      *
      * @param[in]  input   Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
@@ -168,7 +181,7 @@ private:
 
     MemoryGroup                                                         _memory_group;
     IWeightsManager                                                    *_weights_manager;
-    NEFlattenLayerKernel                                                _flatten_kernel;
+    std::unique_ptr<NEFlattenLayerKernel>                               _flatten_kernel;
     NEConvertFullyConnectedWeights                                      _convert_weights;
     weights_transformations::NEConvertFullyConnectedWeightsManaged      _convert_weights_managed;
     NEFullyConnectedLayerReshapeWeights                                 _reshape_weights_function;
diff --git a/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h b/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h
index 6b561352a6..5dc804e240 100644
--- a/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h
+++ b/arm_compute/runtime/NEON/functions/NEFuseBatchNormalization.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,7 +25,6 @@
 #define ARM_COMPUTE_NEFUSEBATCHNORMALIZATION_H
 
 #include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/kernels/NEFuseBatchNormalizationKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 
@@ -33,6 +32,7 @@ namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class NEFuseBatchNormalizationKernel;
 
 /** Basic function to fuse the batch normalization node to a preceding convolution node */
 class NEFuseBatchNormalization : public IFunction
@@ -49,7 +49,7 @@ public:
     /** Allow instances of this class to be moved */
     NEFuseBatchNormalization &operator=(NEFuseBatchNormalization &&) = default;
     /** Default destructor */
-    ~NEFuseBatchNormalization() = default;
+    ~NEFuseBatchNormalization();
     /** Set the input and output tensors.
      *
      * @param[in]  input_weights Input weights tensor for convolution or depthwise convolution layer. Data type supported: F16/F32. Data layout supported: NCHW, NHWC
@@ -94,7 +94,7 @@ public:
     void run() override;
 
 private:
-    NEFuseBatchNormalizationKernel _fuse_bn_kernel;
+    std::unique_ptr<NEFuseBatchNormalizationKernel> _fuse_bn_kernel;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_NEFUSEBATCHNORMALIZATION_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h
index 8d65fb5303..645ab56417 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMM.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMM.h
@@ -24,11 +24,6 @@
 #ifndef ARM_COMPUTE_NEGEMM_H
 #define ARM_COMPUTE_NEGEMM_H
 
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/IWeightsManager.h"
@@ -38,8 +33,14 @@
 #include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
 #include "arm_compute/runtime/Tensor.h"
 
+#include <memory>
+
 namespace arm_compute
 {
+class NEGEMMInterleave4x4Kernel;
+class NEGEMMMatrixAdditionKernel;
+class NEGEMMMatrixMultiplyKernel;
+class NEGEMMTranspose1xWKernel;
 /** Basic function to execute GEMM on NEON. This function calls the following NEON kernels:
  *
  * If optimized assembly is available:
@@ -69,6 +70,8 @@ public:
     NEGEMM &operator=(const NEGEMM &) = delete;
     /** Default move assignment operator */
     NEGEMM &operator=(NEGEMM &&) = default;
+    /** Default destructor */
+    ~NEGEMM();
     /** Initialise the kernel's inputs, output
      *
      * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
@@ -104,16 +107,16 @@ public:
     void prepare() override;
 
 private:
-    MemoryGroup                _memory_group;
-    IWeightsManager           *_weights_manager;
-    NEGEMMInterleave4x4Kernel  _interleave_kernel;
-    NEGEMMTranspose1xWKernel   _transpose_kernel;
-    NEGEMMMatrixMultiplyKernel _mm_kernel;
-    NEGEMMAssemblyDispatch     _asm_glue;
-    NEGEMMMatrixAdditionKernel _ma_kernel;
-    NEActivationLayer          _alpha_scale_func;
-    NEArithmeticAddition       _add_bias;
-    NEActivationLayer          _activation_func;
+    MemoryGroup                                 _memory_group;
+    IWeightsManager                            *_weights_manager;
+    std::unique_ptr<NEGEMMInterleave4x4Kernel>  _interleave_kernel;
+    std::unique_ptr<NEGEMMTranspose1xWKernel>   _transpose_kernel;
+    std::unique_ptr<NEGEMMMatrixMultiplyKernel> _mm_kernel;
+    NEGEMMAssemblyDispatch                      _asm_glue;
+    std::unique_ptr<NEGEMMMatrixAdditionKernel> _ma_kernel;
+    NEActivationLayer                           _alpha_scale_func;
+    NEArithmeticAddition                        _add_bias;
+    NEActivationLayer                           _activation_func;
 
     Tensor         _tmp_a;
     Tensor         _tmp_b;
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index b3f5c51010..6bcf56fb0b 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -26,10 +26,6 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
-#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
-#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IWeightsManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
@@ -44,6 +40,9 @@
 namespace arm_compute
 {
 class ITensor;
+class NECol2ImKernel;
+class NEIm2ColKernel;
+class NEWeightsReshapeKernel;
 
 /** Function to reshape the weights. This function calls the following kernel:
  * -# @ref NEWeightsReshapeKernel
@@ -61,6 +60,8 @@ public:
     NEConvolutionLayerReshapeWeights &operator=(const NEConvolutionLayerReshapeWeights &) = delete;
     /** Default move assignment operator */
     NEConvolutionLayerReshapeWeights &operator=(NEConvolutionLayerReshapeWeights &&) = default;
+    /** Default destructor */
+    ~NEConvolutionLayerReshapeWeights();
     /** Set the input and output tensors.
      *
      * @param[in]  weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
@@ -88,7 +89,7 @@ public:
     void run() override;
 
 private:
-    NEWeightsReshapeKernel _weights_reshape_kernel;
+    std::unique_ptr<NEWeightsReshapeKernel> _weights_reshape_kernel;
 };
 
 namespace weights_transformations
@@ -97,6 +98,18 @@ namespace weights_transformations
 class NEConvolutionLayerReshapeWeightsTransform : public ITransformWeights
 {
 public:
+    /** Constructor */
+    NEConvolutionLayerReshapeWeightsTransform() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvolutionLayerReshapeWeightsTransform(const NEConvolutionLayerReshapeWeightsTransform &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEConvolutionLayerReshapeWeightsTransform &operator=(const NEConvolutionLayerReshapeWeightsTransform &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvolutionLayerReshapeWeightsTransform(NEConvolutionLayerReshapeWeightsTransform &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEConvolutionLayerReshapeWeightsTransform &operator=(NEConvolutionLayerReshapeWeightsTransform &&) = delete;
+    /** Default destructor */
+    ~NEConvolutionLayerReshapeWeightsTransform() = default;
     void configure(const ITensor *input, const ITensor *biases)
     {
         _bias_bit = (biases != nullptr) ? 1 : 0;
@@ -160,6 +173,8 @@ public:
     NEGEMMConvolutionLayer &operator=(const NEGEMMConvolutionLayer &) = delete;
     /** Default move assignment operator */
     NEGEMMConvolutionLayer &operator=(NEGEMMConvolutionLayer &&) = default;
+    /** Default destructor */
+    ~NEGEMMConvolutionLayer();
     /** Set the input and output tensors.
      *
      * @param[in]  input        Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -253,10 +268,10 @@ private:
     IWeightsManager                                                   *_weights_manager;
     NEConvolutionLayerReshapeWeights                                   _reshape_weights;
     weights_transformations::NEConvolutionLayerReshapeWeightsTransform _reshape_weights_managed;
-    NEIm2ColKernel                                                     _im2col_kernel;
+    std::unique_ptr<NEIm2ColKernel>                                    _im2col_kernel;
     NEGEMM                                                             _mm_gemm;
     NEGEMMLowpMatrixMultiplyCore                                       _mm_gemmlowp;
-    NECol2ImKernel                                                     _col2im_kernel;
+    std::unique_ptr<NECol2ImKernel>                                    _col2im_kernel;
     NEReshapeLayer                                                     _reshape_layer;
 
     const ITensor *_original_weights;
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h
index 58cb383c67..7195c71063 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMInterleave4x4.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h
index 9813b34661..961b1901e7 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,6 @@
 #ifndef ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H
 #define ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H
 
-#include "arm_compute/core/NEON/INEKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
@@ -37,6 +36,9 @@ namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class NEGEMMInterleave4x4Kernel;
+class NEGEMMTranspose1xWKernel;
+class NEGEMMLowpMatrixMultiplyKernel;
 
 /** Basic function to execute matrix multiply assembly kernels. */
 class NEGEMMLowpAssemblyMatrixMultiplyCore : public IFunction
@@ -44,6 +46,9 @@ class NEGEMMLowpAssemblyMatrixMultiplyCore : public IFunction
 public:
     /** Constructor */
     NEGEMMLowpAssemblyMatrixMultiplyCore(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Destructor */
+    ~NEGEMMLowpAssemblyMatrixMultiplyCore();
+
     /** Initialise the kernel's inputs, output
      *
      * @param[in]  a      First input tensor  (Matrix A). Data type supported: U8, S8.
@@ -57,13 +62,13 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                _memory_group;
-    NEGEMMAssemblyDispatch     _asm_glue;
-    std::unique_ptr<INEKernel> _mm_kernel;
-    std::unique_ptr<INEKernel> _mtx_a_reshape_kernel;
-    std::unique_ptr<INEKernel> _mtx_b_reshape_kernel;
-    Tensor                     _tmp_a;
-    Tensor                     _tmp_b;
+    MemoryGroup                                     _memory_group;
+    NEGEMMAssemblyDispatch                          _asm_glue;
+    std::unique_ptr<NEGEMMLowpMatrixMultiplyKernel> _mm_kernel;
+    std::unique_ptr<NEGEMMInterleave4x4Kernel>      _mtx_a_reshape_kernel;
+    std::unique_ptr<NEGEMMTranspose1xWKernel>       _mtx_b_reshape_kernel;
+    Tensor                                          _tmp_a;
+    Tensor                                          _tmp_b;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_NEGEMMLOWPASSEMBLYMATRIXMULTIPLYCORE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
index 01720f05fa..cb1d6bd782 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
@@ -25,15 +25,6 @@
 #define ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H
 
 #include "NEActivationLayer.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
-#include "arm_compute/core/NEON/kernels/NEConvertQuantizedSignednessKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
@@ -45,6 +36,15 @@
 namespace arm_compute
 {
 class ITensor;
+class NEConvertQuantizedSignednessKernel;
+class NEConvertQuantizedSignednessKernel;
+class NEGEMMInterleave4x4Kernel;
+class NEGEMMLowpMatrixMultiplyKernel;
+class NEGEMMLowpOffsetContributionKernel;
+class NEGEMMLowpOffsetContributionOutputStageKernel;
+class NEGEMMLowpMatrixAReductionKernel;
+class NEGEMMLowpMatrixBReductionKernel;
+class NEGEMMTranspose1xWKernel;
 
 /** Basic function to execute GEMMLowpMatrixMultiplyCore on NEON. This function calls the following NEON kernels if the DOT product instruction is not available:
  *
@@ -72,6 +72,8 @@ public:
     NEGEMMLowpMatrixMultiplyCore &operator=(const NEGEMMLowpMatrixMultiplyCore &) = delete;
     /** Default move assignment operator */
     NEGEMMLowpMatrixMultiplyCore &operator=(NEGEMMLowpMatrixMultiplyCore &&) = default;
+    /** Default destructor */
+    ~NEGEMMLowpMatrixMultiplyCore();
     /** Initialise the kernel's inputs, output
      *
      * @note GEMM_LOWP:  low precision GEMM kernel
@@ -111,19 +113,19 @@ public:
     void prepare() override;
 
 private:
-    MemoryGroup                                   _memory_group;
-    IWeightsManager                              *_weights_manager;
-    NEGEMMAssemblyDispatch                        _asm_glue;
-    NEGEMMLowpMatrixMultiplyKernel                _mm_kernel;
-    NEGEMMInterleave4x4Kernel                     _mtx_a_reshape_kernel;
-    NEGEMMTranspose1xWKernel                      _mtx_b_reshape_kernel;
-    NEGEMMLowpMatrixAReductionKernel              _mtx_a_reduction_kernel;
-    NEGEMMLowpMatrixBReductionKernel              _mtx_b_reduction_kernel;
-    NEGEMMLowpOffsetContributionKernel            _offset_contribution_kernel;
-    NEGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel;
-    NEActivationLayer                             _activation_func;
-    NEConvertQuantizedSignednessKernel            _convert_to_signed_asymm;
-    NEConvertQuantizedSignednessKernel            _convert_from_signed_asymm;
+    MemoryGroup                                                    _memory_group;
+    IWeightsManager                                               *_weights_manager;
+    NEGEMMAssemblyDispatch                                         _asm_glue;
+    std::unique_ptr<NEGEMMLowpMatrixMultiplyKernel>                _mm_kernel;
+    std::unique_ptr<NEGEMMInterleave4x4Kernel>                     _mtx_a_reshape_kernel;
+    std::unique_ptr<NEGEMMTranspose1xWKernel>                      _mtx_b_reshape_kernel;
+    std::unique_ptr<NEGEMMLowpMatrixAReductionKernel>              _mtx_a_reduction_kernel;
+    std::unique_ptr<NEGEMMLowpMatrixBReductionKernel>              _mtx_b_reduction_kernel;
+    std::unique_ptr<NEGEMMLowpOffsetContributionKernel>            _offset_contribution_kernel;
+    std::unique_ptr<NEGEMMLowpOffsetContributionOutputStageKernel> _offset_contribution_output_stage_kernel;
+    NEActivationLayer                                              _activation_func;
+    std::unique_ptr<NEConvertQuantizedSignednessKernel>            _convert_to_signed_asymm;
+    std::unique_ptr<NEConvertQuantizedSignednessKernel>            _convert_from_signed_asymm;
 
     Tensor         _vector_sum_col;
     Tensor         _vector_sum_row;
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
index f29d5d464b..6977d27cb6 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h
@@ -24,6 +24,7 @@
 #ifndef ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H
 #define ARM_COMPUTE_NEGEMMLOWPOUTPUTSTAGE_H
 
+#include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 /** This file contains all available output stages for GEMMLowp on NEON.
@@ -37,6 +38,7 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to execute NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint on NEON.
  *
@@ -69,6 +71,18 @@ class ITensor;
 class NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &operator=(const NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &operator=(NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint &&) = delete;
+    /** Default destructor */
+    ~NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint();
     /** Initialise the kernel's inputs, output
      *
      * @param[in]  input                        Input tensor. Data type supported: S32
@@ -129,6 +143,18 @@ public:
 class NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &operator=(const NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &operator=(NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint &&) = delete;
+    /** Default destructor */
+    ~NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint();
     /** Initialise the kernel's inputs, output
      *
      * @param[in]  input                        Input tensor. Data type supported: S32
@@ -189,6 +215,18 @@ public:
 class NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &operator=(const NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &operator=(NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint &&) = delete;
+    /** Default destructor */
+    ~NEGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint();
     /** Initialise the kernel's inputs, output
      *
      * @param[in]  input                        Input tensor. Data type supported: S32
@@ -230,6 +268,18 @@ public:
 class NEGEMMLowpOutputStage : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEGEMMLowpOutputStage() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMLowpOutputStage(const NEGEMMLowpOutputStage &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMLowpOutputStage &operator=(const NEGEMMLowpOutputStage &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEGEMMLowpOutputStage(NEGEMMLowpOutputStage &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEGEMMLowpOutputStage &operator=(NEGEMMLowpOutputStage &&) = delete;
+    /** Default destructor */
+    ~NEGEMMLowpOutputStage();
     /** Initialise the kernel's inputs, output
      *
      * @param[in]  input  Input tensor. Data type supported: S32
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
index 983c95d732..723a638d76 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,12 +24,14 @@
 #ifndef ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H
 #define ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H
 
+#include "arm_compute/core/Error.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to execute NEGEMMTranspose1xWKernel. This function calls the following NEON kernels:
  *
@@ -39,6 +41,18 @@ class ITensor;
 class NEGEMMTranspose1xW : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEGEMMTranspose1xW() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMTranspose1xW(const NEGEMMTranspose1xW &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGEMMTranspose1xW &operator=(const NEGEMMTranspose1xW &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEGEMMTranspose1xW(NEGEMMTranspose1xW &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEGEMMTranspose1xW &operator=(NEGEMMTranspose1xW &&) = delete;
+    /** Default destructor */
+    ~NEGEMMTranspose1xW() = default;
     /** Initialise the kernel's inputs, output
      *
      * @param[in]  input  First input tensor. Data type supported: All
diff --git a/arm_compute/runtime/NEON/functions/NEGather.h b/arm_compute/runtime/NEON/functions/NEGather.h
index b872c44443..a5e0461227 100644
--- a/arm_compute/runtime/NEON/functions/NEGather.h
+++ b/arm_compute/runtime/NEON/functions/NEGather.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,6 +32,7 @@ namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEGatherKernel */
 class NEGather : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NEGaussian3x3.h b/arm_compute/runtime/NEON/functions/NEGaussian3x3.h
index 54fe91b975..db533858ee 100644
--- a/arm_compute/runtime/NEON/functions/NEGaussian3x3.h
+++ b/arm_compute/runtime/NEON/functions/NEGaussian3x3.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEGaussian5x5.h b/arm_compute/runtime/NEON/functions/NEGaussian5x5.h
index 2e042e2307..3d933bbd3d 100644
--- a/arm_compute/runtime/NEON/functions/NEGaussian5x5.h
+++ b/arm_compute/runtime/NEON/functions/NEGaussian5x5.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,8 +24,6 @@
 #ifndef ARM_COMPUTE_NEGAUSSIAN5x5_H
 #define ARM_COMPUTE_NEGAUSSIAN5x5_H
 
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGaussian5x5Kernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
@@ -38,6 +36,9 @@
 namespace arm_compute
 {
 class ITensor;
+class NEGaussian5x5HorKernel;
+class NEGaussian5x5VertKernel;
+class NEFillBorderKernel;
 
 /** Basic function to execute gaussian filter 5x5. This function calls the following NEON kernels:
  *
@@ -52,6 +53,16 @@ public:
     /** Default constructor
      */
     NEGaussian5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGaussian5x5(const NEGaussian5x5 &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGaussian5x5 &operator=(const NEGaussian5x5 &) = delete;
+    /** Allow instances of this class to be moved */
+    NEGaussian5x5(NEGaussian5x5 &&) = default;
+    /** Allow instances of this class to be moved */
+    NEGaussian5x5 &operator=(NEGaussian5x5 &&) = default;
+    /** Default destructor */
+    ~NEGaussian5x5();
     /** Initialise the function's input, output and border mode.
      *
      * @param[in, out] input                 Source tensor. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
@@ -65,11 +76,11 @@ public:
     void run() override;
 
 protected:
-    MemoryGroup             _memory_group;   /**< Function memory group */
-    NEGaussian5x5HorKernel  _kernel_hor;     /**< kernel for horizontal pass */
-    NEGaussian5x5VertKernel _kernel_vert;    /**< kernel for vertical pass */
-    Tensor                  _tmp;            /**< temporary buffer for output of horizontal pass */
-    NEFillBorderKernel      _border_handler; /**< kernel to handle tensor borders */
+    MemoryGroup                              _memory_group;   /**< Function memory group */
+    std::unique_ptr<NEGaussian5x5HorKernel>  _kernel_hor;     /**< kernel for horizontal pass */
+    std::unique_ptr<NEGaussian5x5VertKernel> _kernel_vert;    /**< kernel for vertical pass */
+    Tensor                                   _tmp;            /**< temporary buffer for output of horizontal pass */
+    std::unique_ptr<NEFillBorderKernel>      _border_handler; /**< kernel to handle tensor borders */
 };
 }
 #endif /*ARM_COMPUTE_NEGAUSSIAN5x5_H */
diff --git a/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h b/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h
index d82f763f95..c82de0f4c2 100644
--- a/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h
+++ b/arm_compute/runtime/NEON/functions/NEGaussianPyramid.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,7 +25,6 @@
 #define ARM_COMPUTE_NEGAUSSIANPYRAMID_H
 
 #include "arm_compute/core/IPyramid.h"
-#include "arm_compute/core/NEON/kernels/NEGaussianPyramidKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h"
@@ -39,6 +38,9 @@
 namespace arm_compute
 {
 class ITensor;
+class NEGaussianPyramidHorKernel;
+class NEGaussianPyramidVertKernel;
+class NEFillBorderKernel;
 
 /** Common interface for all Gaussian pyramid functions */
 class NEGaussianPyramid : public IFunction
@@ -85,16 +87,26 @@ class NEGaussianPyramidHalf : public NEGaussianPyramid
 public:
     /** Constructor */
     NEGaussianPyramidHalf();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGaussianPyramidHalf(const NEGaussianPyramidHalf &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGaussianPyramidHalf &operator=(const NEGaussianPyramidHalf &) = delete;
+    /** Allow instances of this class to be moved */
+    NEGaussianPyramidHalf(NEGaussianPyramidHalf &&) = default;
+    /** Allow instances of this class to be moved */
+    NEGaussianPyramidHalf &operator=(NEGaussianPyramidHalf &&) = default;
+    /** Default destructor */
+    ~NEGaussianPyramidHalf();
 
     // Inherited methods overridden:
     void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
     void run() override;
 
 private:
-    std::vector<NEFillBorderKernel>          _horizontal_border_handler;
-    std::vector<NEFillBorderKernel>          _vertical_border_handler;
-    std::vector<NEGaussianPyramidHorKernel>  _horizontal_reduction;
-    std::vector<NEGaussianPyramidVertKernel> _vertical_reduction;
+    std::vector<std::unique_ptr<NEFillBorderKernel>>          _horizontal_border_handler;
+    std::vector<std::unique_ptr<NEFillBorderKernel>>          _vertical_border_handler;
+    std::vector<std::unique_ptr<NEGaussianPyramidHorKernel>>  _horizontal_reduction;
+    std::vector<std::unique_ptr<NEGaussianPyramidVertKernel>> _vertical_reduction;
 };
 
 /** Basic function to execute gaussian pyramid with ORB scale factor. This function calls the following NEON kernels and functions:
@@ -109,6 +121,16 @@ class NEGaussianPyramidOrb : public NEGaussianPyramid
 public:
     /** Constructor */
     NEGaussianPyramidOrb();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGaussianPyramidOrb(const NEGaussianPyramidOrb &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEGaussianPyramidOrb &operator=(const NEGaussianPyramidOrb &) = delete;
+    /** Allow instances of this class to be moved */
+    NEGaussianPyramidOrb(NEGaussianPyramidOrb &&) = default;
+    /** Allow instances of this class to be moved */
+    NEGaussianPyramidOrb &operator=(NEGaussianPyramidOrb &&) = default;
+    /** Default destructor */
+    ~NEGaussianPyramidOrb();
 
     // Inherited methods overridden:
     void configure(const ITensor *input, IPyramid *pyramid, BorderMode border_mode, uint8_t constant_border_value) override;
diff --git a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h
index f937832c0e..613f0d1c47 100644
--- a/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGenerateProposalsLayer.h
@@ -24,17 +24,17 @@
 #ifndef ARM_COMPUTE_NEGENERATEPROPOSALSLAYER_H
 #define ARM_COMPUTE_NEGENERATEPROPOSALSLAYER_H
 
-#include "arm_compute/core/NEON/kernels/NEBoundingBoxTransformKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDequantizationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGenerateProposalsLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPermuteKernel.h"
-#include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CPP/CPPScheduler.h"
 #include "arm_compute/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEBoundingBoxTransform.h"
+#include "arm_compute/runtime/NEON/functions/NEComputeAllAnchors.h"
+#include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEPadLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEPermute.h"
+#include "arm_compute/runtime/NEON/functions/NEQuantizationLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
 #include "arm_compute/runtime/Tensor.h"
 
@@ -67,6 +67,8 @@ public:
     NEGenerateProposalsLayer(const NEGenerateProposalsLayer &) = delete;
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     NEGenerateProposalsLayer &operator=(const NEGenerateProposalsLayer &) = delete;
+    /** Default destructor */
+    ~NEGenerateProposalsLayer();
 
     /** Set the input and output tensors.
      *
@@ -112,16 +114,16 @@ private:
     MemoryGroup _memory_group;
 
     // Neon kernels
-    NEPermuteKernel              _permute_deltas_kernel;
-    NEReshapeLayer               _flatten_deltas;
-    NEPermuteKernel              _permute_scores_kernel;
-    NEReshapeLayer               _flatten_scores;
-    NEComputeAllAnchorsKernel    _compute_anchors_kernel;
-    NEBoundingBoxTransformKernel _bounding_box_kernel;
-    NEPadLayerKernel             _pad_kernel;
-    NEDequantizationLayerKernel  _dequantize_anchors;
-    NEDequantizationLayerKernel  _dequantize_deltas;
-    NEQuantizationLayerKernel    _quantize_all_proposals;
+    NEPermute              _permute_deltas;
+    NEReshapeLayer         _flatten_deltas;
+    NEPermute              _permute_scores;
+    NEReshapeLayer         _flatten_scores;
+    NEComputeAllAnchors    _compute_anchors;
+    NEBoundingBoxTransform _bounding_box;
+    NEPadLayer             _pad;
+    NEDequantizationLayer  _dequantize_anchors;
+    NEDequantizationLayer  _dequantize_deltas;
+    NEQuantizationLayer    _quantize_all_proposals;
 
     // CPP functions
     CPPBoxWithNonMaximaSuppressionLimit _cpp_nms;
diff --git a/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h b/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h
index 9b6fc4737b..c900040982 100644
--- a/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h
+++ b/arm_compute/runtime/NEON/functions/NEHOGDescriptor.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,6 @@
 #ifndef ARM_COMPUTE_NEHOGDESCRIPTOR_H
 #define ARM_COMPUTE_NEHOGDESCRIPTOR_H
 
-#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
@@ -36,6 +35,9 @@
 namespace arm_compute
 {
 class IHOG;
+class NEHOGOrientationBinningKernel;
+class NEHOGBlockNormalizationKernel;
+
 /** Basic function to calculate HOG descriptor. This function calls the following NEON kernels:
  *
  * -# @ref NEHOGGradient
@@ -48,6 +50,16 @@ class NEHOGDescriptor : public IFunction
 public:
     /** Default constructor */
     NEHOGDescriptor(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEHOGDescriptor(const NEHOGDescriptor &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEHOGDescriptor &operator=(const NEHOGDescriptor &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEHOGDescriptor(NEHOGDescriptor &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEHOGDescriptor &operator=(NEHOGDescriptor &&) = delete;
+    /** Default destructor */
+    ~NEHOGDescriptor();
     /** Initialise the function's source, destination, HOG data-object and border mode
      *
      * @param[in, out] input                 Input tensor. Data type supported: U8
@@ -63,13 +75,13 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                   _memory_group;
-    NEHOGGradient                 _gradient;
-    NEHOGOrientationBinningKernel _orient_bin;
-    NEHOGBlockNormalizationKernel _block_norm;
-    Tensor                        _mag;
-    Tensor                        _phase;
-    Tensor                        _hog_space;
+    MemoryGroup                                    _memory_group;
+    NEHOGGradient                                  _gradient;
+    std::unique_ptr<NEHOGOrientationBinningKernel> _orient_bin;
+    std::unique_ptr<NEHOGBlockNormalizationKernel> _block_norm;
+    Tensor                                         _mag;
+    Tensor                                         _phase;
+    Tensor                                         _hog_space;
 };
 }
 
diff --git a/arm_compute/runtime/NEON/functions/NEHOGDetector.h b/arm_compute/runtime/NEON/functions/NEHOGDetector.h
index 6400d3c367..89224b62a0 100644
--- a/arm_compute/runtime/NEON/functions/NEHOGDetector.h
+++ b/arm_compute/runtime/NEON/functions/NEHOGDetector.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,12 +24,14 @@
 #ifndef ARM_COMPUTE_NEHOGDETECTOR_H
 #define ARM_COMPUTE_NEHOGDETECTOR_H
 
+#include "arm_compute/core/IArray.h"
 #include "arm_compute/core/IHOG.h"
-#include "arm_compute/core/NEON/kernels/NEHOGDetectorKernel.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
+class ITensor;
+class ITensorInfo;
 /** Basic function to execute HOG detector based on linear SVM. This function calls the following NEON kernel:
  *
  * -# @ref NEHOGDetectorKernel
@@ -38,6 +40,18 @@ namespace arm_compute
 class NEHOGDetector : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEHOGDetector() = default;
+    /** Prevent instances of this class from being copied */
+    NEHOGDetector(const NEHOGDetector &) = delete;
+    /** Default move constructor */
+    NEHOGDetector(NEHOGDetector &&) = default;
+    /** Prevent instances of this class from being copied */
+    NEHOGDetector &operator=(const NEHOGDetector &) = delete;
+    /** Default move assignment operator */
+    NEHOGDetector &operator=(NEHOGDetector &&) = default;
+    /** Destructor */
+    ~NEHOGDetector();
     /** Initialise the kernel's input, output, HOG data object, detection window stride, threshold and index class
      *
      * @attention The function does not reset the number of values in @ref IDetectionWindowArray so it is caller's responsibility to clear it.
diff --git a/arm_compute/runtime/NEON/functions/NEHOGGradient.h b/arm_compute/runtime/NEON/functions/NEHOGGradient.h
index 2d3f934f54..05a16db995 100644
--- a/arm_compute/runtime/NEON/functions/NEHOGGradient.h
+++ b/arm_compute/runtime/NEON/functions/NEHOGGradient.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,6 @@
 #ifndef ARM_COMPUTE_NEHOGGRADIENT_H
 #define ARM_COMPUTE_NEHOGGRADIENT_H
 
-#include "arm_compute/core/NEON/INEKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
@@ -38,6 +37,8 @@
 namespace arm_compute
 {
 class ITensor;
+class ICPPKernel;
+
 /** Basic function to calculate the gradient for HOG. This function calls the following NEON kernels:
  *
  * -# @ref NEDerivative
@@ -49,6 +50,16 @@ class NEHOGGradient : public IFunction
 public:
     /** Default constructor */
     NEHOGGradient(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEHOGGradient(const NEHOGGradient &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEHOGGradient &operator=(const NEHOGGradient &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEHOGGradient(NEHOGGradient &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEHOGGradient &operator=(NEHOGGradient &&) = delete;
+    /** Default destructor */
+    ~NEHOGGradient();
     /** Initialise the function's source, destinations, phase type and border mode
      *
      * @param[in, out] input                 Input tensor. Data type supported: U8.
@@ -65,11 +76,11 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                _memory_group;
-    NEDerivative               _derivative;
-    std::unique_ptr<INEKernel> _mag_phase;
-    Tensor                     _gx;
-    Tensor                     _gy;
+    MemoryGroup                 _memory_group;
+    NEDerivative                _derivative;
+    std::unique_ptr<ICPPKernel> _mag_phase;
+    Tensor                      _gx;
+    Tensor                      _gy;
 };
 }
 #endif /*ARM_COMPUTE_NEHOGGRADIENT_H */
diff --git a/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h b/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h
index ff64afb119..0fb3edd490 100644
--- a/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h
+++ b/arm_compute/runtime/NEON/functions/NEHOGMultiDetection.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,7 +27,6 @@
 #include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h"
 #include "arm_compute/core/IArray.h"
 #include "arm_compute/core/IMultiHOG.h"
-#include "arm_compute/core/NEON/kernels/NEHOGDescriptorKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
@@ -39,6 +38,9 @@
 
 namespace arm_compute
 {
+class NEHOGOrientationBinningKernel;
+class NEHOGBlockNormalizationKernel;
+
 /** Basic function to detect multiple objects (or the same object at different scales) on the same input image using HOG. This function calls the following NEON kernels:
  *
  * -# @ref NEHOGGradient
@@ -60,8 +62,14 @@ public:
     NEHOGMultiDetection(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     NEHOGMultiDetection(const NEHOGMultiDetection &) = delete;
+    /** Default move constructor */
+    NEHOGMultiDetection(NEHOGMultiDetection &&) = default;
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     NEHOGMultiDetection &operator=(const NEHOGMultiDetection &) = delete;
+    /** Default move assignment operator */
+    NEHOGMultiDetection &operator=(NEHOGMultiDetection &&) = default;
+    /** Default destructor */
+    ~NEHOGMultiDetection();
     /** Initialise the function's source, destination, detection window strides, border mode, threshold and non-maxima suppression
      *
      * @param[in, out] input                    Input tensor. Data type supported: U8
diff --git a/arm_compute/runtime/NEON/functions/NEHarrisCorners.h b/arm_compute/runtime/NEON/functions/NEHarrisCorners.h
index c086e3a7ce..e2dc052afc 100644
--- a/arm_compute/runtime/NEON/functions/NEHarrisCorners.h
+++ b/arm_compute/runtime/NEON/functions/NEHarrisCorners.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,8 +26,6 @@
 
 #include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h"
 #include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEHarrisCornersKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/Array.h"
 #include "arm_compute/runtime/IFunction.h"
@@ -42,6 +40,8 @@
 namespace arm_compute
 {
 class ITensor;
+class NEFillBorderKernel;
+class INEHarrisScoreKernel;
 using IImage = ITensor;
 
 /** Basic function to execute harris corners detection. This function calls the following NEON kernels and functions:
@@ -68,6 +68,16 @@ public:
      * @param[in] memory_manager (Optional) Memory manager.
      */
     NEHarrisCorners(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEHarrisCorners(const NEHarrisCorners &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEHarrisCorners &operator=(const NEHarrisCorners &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEHarrisCorners(NEHarrisCorners &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEHarrisCorners &operator=(NEHarrisCorners &&) = delete;
+    /** Default destructor */
+    ~NEHarrisCorners();
     /** Initialize the function's source, destination, conv and border_mode.
      *
      * @param[in, out] input                 Source image. Data type supported: U8. (Written to only for @p border_mode != UNDEFINED)
@@ -94,8 +104,8 @@ private:
     NENonMaximaSuppression3x3             _non_max_suppr;         /**< Non-maxima suppression function */
     CPPCornerCandidatesKernel             _candidates;            /**< Sort kernel */
     CPPSortEuclideanDistanceKernel        _sort_euclidean;        /**< Euclidean distance kernel */
-    NEFillBorderKernel                    _border_gx;             /**< Border handler before running harris score */
-    NEFillBorderKernel                    _border_gy;             /**< Border handler before running harris score */
+    std::unique_ptr<NEFillBorderKernel>   _border_gx;             /**< Border handler before running harris score */
+    std::unique_ptr<NEFillBorderKernel>   _border_gy;             /**< Border handler before running harris score */
     Image                                 _gx;                    /**< Source image - Gx component */
     Image                                 _gy;                    /**< Source image - Gy component */
     Image                                 _score;                 /**< Source image - Harris score */
diff --git a/arm_compute/runtime/NEON/functions/NEHistogram.h b/arm_compute/runtime/NEON/functions/NEHistogram.h
index 716f2e71f9..60766ebcdc 100644
--- a/arm_compute/runtime/NEON/functions/NEHistogram.h
+++ b/arm_compute/runtime/NEON/functions/NEHistogram.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,16 +24,19 @@
 #ifndef ARM_COMPUTE_NEHISTOGRAM_H
 #define ARM_COMPUTE_NEHISTOGRAM_H
 
-#include "arm_compute/core/NEON/kernels/NEHistogramKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 
 #include <cstddef>
 #include <cstdint>
 #include <memory>
+#include <vector>
 
 namespace arm_compute
 {
+class ITensor;
 class IDistribution1D;
+class NEHistogramKernel;
+using IImage = ITensor;
 
 /** Basic function to run @ref NEHistogramKernel. */
 class NEHistogram : public IFunction
@@ -41,6 +44,16 @@ class NEHistogram : public IFunction
 public:
     /** Default Constructor. */
     NEHistogram();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEHistogram(const NEHistogram &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEHistogram &operator=(const NEHistogram &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEHistogram(NEHistogram &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEHistogram &operator=(NEHistogram &&) = delete;
+    /** Default destructor */
+    ~NEHistogram();
     /** Initialise the kernel's inputs.
      *
      * @param[in]  input  Input image. Data type supported: U8.
@@ -52,10 +65,10 @@ public:
     void run() override;
 
 private:
-    NEHistogramKernel     _histogram_kernel;
-    std::vector<uint32_t> _local_hist;
-    std::vector<uint32_t> _window_lut;
-    size_t                _local_hist_size;
+    std::unique_ptr<NEHistogramKernel> _histogram_kernel;
+    std::vector<uint32_t>              _local_hist;
+    std::vector<uint32_t>              _window_lut;
+    size_t                             _local_hist_size;
     /** 256 possible pixel values as we handle only U8 images */
     static constexpr unsigned int window_lut_default_size = 256;
 };
diff --git a/arm_compute/runtime/NEON/functions/NEIm2Col.h b/arm_compute/runtime/NEON/functions/NEIm2Col.h
index 3ea9c1cfaf..2f023f44fe 100644
--- a/arm_compute/runtime/NEON/functions/NEIm2Col.h
+++ b/arm_compute/runtime/NEON/functions/NEIm2Col.h
@@ -26,14 +26,16 @@
 
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
-#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
 #include "arm_compute/core/Size2D.h"
 #include "arm_compute/core/Types.h"
+#include <memory>
 
 namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ITensorInfo;
+class NEIm2ColKernel;
 
 /** Basic function to run @ref NEIm2ColKernel */
 class NEIm2Col : public IFunction
@@ -41,6 +43,16 @@ class NEIm2Col : public IFunction
 public:
     /** Default constructor */
     NEIm2Col();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEIm2Col(const NEIm2Col &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEIm2Col &operator=(const NEIm2Col &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEIm2Col(NEIm2Col &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEIm2Col &operator=(NEIm2Col &&) = delete;
+    /** Default destructor */
+    ~NEIm2Col();
     /** Configure the im2col NEON kernel
      *
      * @param[in]  input       The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
@@ -78,8 +90,8 @@ public:
     void run() override;
 
 private:
-    NEIm2ColKernel _kernel;
-    unsigned int   _y_dim;
+    std::unique_ptr<NEIm2ColKernel> _kernel;
+    unsigned int                    _y_dim;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NEIM2COL_H */
diff --git a/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h
index 85a307c2d4..57165c94b4 100644
--- a/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEInstanceNormalizationLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,6 @@
 #ifndef ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYER_H
 #define ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYER_H
 
-#include "arm_compute/core/NEON/kernels/NEInstanceNormalizationLayerKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
@@ -37,6 +36,7 @@
 namespace arm_compute
 {
 class ITensor;
+class NEInstanceNormalizationLayerKernel;
 
 /** Basic function to perform a Instance normalization.
  *
@@ -48,6 +48,16 @@ class NEInstanceNormalizationLayer : public IFunction
 public:
     /** Constructor */
     NEInstanceNormalizationLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEInstanceNormalizationLayer(const NEInstanceNormalizationLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEInstanceNormalizationLayer &operator=(const NEInstanceNormalizationLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEInstanceNormalizationLayer(NEInstanceNormalizationLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEInstanceNormalizationLayer &operator=(NEInstanceNormalizationLayer &&) = delete;
+    /** Default destructor */
+    ~NEInstanceNormalizationLayer();
     /** Set the input and output tensors.
      *
      * @param[in, out] input   Source tensor. In case of @p output tensor = nullptr this tensor will store the result of the normalization.
@@ -75,13 +85,13 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                        _memory_group;
-    NEInstanceNormalizationLayerKernel _normalization_kernel;
-    bool                               _is_nchw;
-    NEPermute                          _permute_input;
-    NEPermute                          _permute_output;
-    Tensor                             _permuted_input;
-    Tensor                             _permuted_output;
+    MemoryGroup                                         _memory_group;
+    std::unique_ptr<NEInstanceNormalizationLayerKernel> _normalization_kernel;
+    bool                                                _is_nchw;
+    NEPermute                                           _permute_input;
+    NEPermute                                           _permute_output;
+    Tensor                                              _permuted_input;
+    Tensor                                              _permuted_output;
 };
 }
 #endif /* ARM_COMPUTE_NEINSTANCENORMALIZATIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEIntegralImage.h b/arm_compute/runtime/NEON/functions/NEIntegralImage.h
index 6302a7adac..a04105c0b9 100644
--- a/arm_compute/runtime/NEON/functions/NEIntegralImage.h
+++ b/arm_compute/runtime/NEON/functions/NEIntegralImage.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,6 +34,18 @@ class ITensor;
 class NEIntegralImage : public INESimpleFunction
 {
 public:
+    /** Constructor */
+    NEIntegralImage() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEIntegralImage(const NEIntegralImage &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEIntegralImage &operator=(const NEIntegralImage &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEIntegralImage(NEIntegralImage &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEIntegralImage &operator=(NEIntegralImage &&) = delete;
+    /** Default destructor */
+    ~NEIntegralImage();
     /** Initialise the function's source, destinations and border mode.
      *
      * @param[in]  input  Source tensor. Data type supported: U8.
diff --git a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
index 66750a5411..173b9d2141 100644
--- a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
@@ -24,7 +24,6 @@
 #ifndef ARM_COMPUTE_NEL2NORMALIZELAYER_H
 #define ARM_COMPUTE_NEL2NORMALIZELAYER_H
 
-#include "arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
@@ -36,6 +35,7 @@
 namespace arm_compute
 {
 class ITensor;
+class NEL2NormalizeLayerKernel;
 
 /** Basic function to perform a L2 normalization on a given axis.
  *
@@ -48,6 +48,16 @@ class NEL2NormalizeLayer : public IFunction
 public:
     /** Constructor */
     NEL2NormalizeLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEL2NormalizeLayer(const NEL2NormalizeLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEL2NormalizeLayer &operator=(const NEL2NormalizeLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEL2NormalizeLayer(NEL2NormalizeLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEL2NormalizeLayer &operator=(NEL2NormalizeLayer &&) = delete;
+    /** Default destructor */
+    ~NEL2NormalizeLayer();
     /** Set the input and output tensors.
      *
      * @param[in, out] input   Source tensor. Data types supported: F16/F32. (Written to only for border_size != 0)
@@ -72,10 +82,10 @@ public:
     void run() override;
 
 private:
-    MemoryGroup              _memory_group;
-    NEReductionOperation     _reduce_func;
-    NEL2NormalizeLayerKernel _normalize_kernel;
-    Tensor                   _sumsq;
+    MemoryGroup                               _memory_group;
+    NEReductionOperation                      _reduce_func;
+    std::unique_ptr<NEL2NormalizeLayerKernel> _normalize_kernel;
+    Tensor                                    _sumsq;
 };
 }
 #endif /* ARM_COMPUTE_NEL2NORMALIZELAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayer.h b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
index 4a47dfb2cf..ef8defb827 100644
--- a/arm_compute/runtime/NEON/functions/NELSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NELSTMLayer.h
@@ -24,18 +24,17 @@
 #ifndef ARM_COMPUTE_NELSTMLAYER_H
 #define ARM_COMPUTE_NELSTMLAYER_H
 
-#include "arm_compute/core/NEON/kernels/NEActivationLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
 #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
 #include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
 #include "arm_compute/runtime/NEON/functions/NEConcatenateLayer.h"
+#include "arm_compute/runtime/NEON/functions/NECopy.h"
 #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMM.h"
 #include "arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
+#include "arm_compute/runtime/NEON/functions/NETranspose.h"
 #include "arm_compute/runtime/common/LSTMParams.h"
 
 namespace arm_compute
@@ -49,6 +48,16 @@ class NELSTMLayer : public IFunction
 public:
     /** Default constructor */
     NELSTMLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NELSTMLayer(const NELSTMLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NELSTMLayer &operator=(const NELSTMLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NELSTMLayer(NELSTMLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NELSTMLayer &operator=(NELSTMLayer &&) = delete;
+    /** Default destructor */
+    ~NELSTMLayer();
     /** Initialize function's tensors.
      *
      * @param[in]  input                       Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: F16/F32.
@@ -158,7 +167,7 @@ private:
     NEActivationLayer              _activation_forget_gate;
     NEFullyConnectedLayer          _fully_connected_cell_state;
     NEGEMM                         _gemm_cell_state1;
-    NETransposeKernel              _transpose_cell_state;
+    NETranspose                    _transpose_cell_state;
     NEArithmeticAddition           _accum_cell_state1;
     NEArithmeticAddition           _accum_cell_state2;
     NEPixelWiseMultiplication      _pixelwise_mul_cell_state1;
@@ -173,8 +182,8 @@ private:
     NEPixelWiseMultiplication      _pixelwise_mul_output_state2;
     NEFullyConnectedLayer          _fully_connected_output_state;
     NEActivationLayer              _projection_clip;
-    NECopyKernel                   _copy_cell_state;
-    NECopyKernel                   _copy_output;
+    NECopy                         _copy_cell_state;
+    NECopy                         _copy_output;
     NEConcatenateLayer             _concat_scratch_buffer;
     NEConcatenateLayer             _concat_inputs_forget_gate;
     NEConcatenateLayer             _concat_weights_forget_gate;
diff --git a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
index 377e173e7d..39fafef773 100644
--- a/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
+++ b/arm_compute/runtime/NEON/functions/NELSTMLayerQuantized.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -73,6 +73,8 @@ public:
     NELSTMLayerQuantized &operator=(const NELSTMLayerQuantized &) = delete;
     /** Default move assignment operator */
     NELSTMLayerQuantized &operator=(NELSTMLayerQuantized &&) = default;
+    /** Default destructor */
+    ~NELSTMLayerQuantized();
     /** Initialize function's tensors.
      *
      * @param[in]  input                       Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8.
diff --git a/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h b/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h
index 1f317f6dd8..eecd9d59cb 100644
--- a/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h
+++ b/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,6 +56,16 @@ class NELaplacianPyramid : public IFunction
 public:
     /** Constructor */
     NELaplacianPyramid();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NELaplacianPyramid(const NELaplacianPyramid &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NELaplacianPyramid &operator=(const NELaplacianPyramid &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NELaplacianPyramid(NELaplacianPyramid &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NELaplacianPyramid &operator=(NELaplacianPyramid &&) = delete;
+    /** Default destructor */
+    ~NELaplacianPyramid();
     /** Initialise the function's source, destinations and border mode.
      *
      * @param[in]  input                 Source tensor. Data type supported: U8.
diff --git a/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h b/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h
index cc4aa0876b..20f76455da 100644
--- a/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h
+++ b/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -62,6 +62,16 @@ class NELaplacianReconstruct : public IFunction
 public:
     /** Constructor */
     NELaplacianReconstruct();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NELaplacianReconstruct(const NELaplacianReconstruct &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NELaplacianReconstruct &operator=(const NELaplacianReconstruct &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NELaplacianReconstruct(NELaplacianReconstruct &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NELaplacianReconstruct &operator=(NELaplacianReconstruct &&) = delete;
+    /** Default destructor */
+    ~NELaplacianReconstruct();
     /** Initialise the function's source, destinations and border mode.
      *
      * The Output image must have the same size as the first level of the pyramid.
diff --git a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h
index dbcaa297ab..e9f3e93474 100644
--- a/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h
@@ -26,13 +26,11 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h"
-#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
-#include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NECol2Im.h"
+#include "arm_compute/runtime/NEON/functions/NEIm2Col.h"
 #include "arm_compute/runtime/Tensor.h"
 
 #include <memory>
@@ -40,6 +38,8 @@
 namespace arm_compute
 {
 class INETensor;
+class NEWeightsReshapeKernel;
+class NELocallyConnectedMatrixMultiplyKernel;
 
 /** Basic function to compute the locally connected layer. This function calls the following NEON kernels:
  *
@@ -61,6 +61,8 @@ public:
     NELocallyConnectedLayer &operator=(const NELocallyConnectedLayer &) = delete;
     /** Default move assignment operator */
     NELocallyConnectedLayer &operator=(NELocallyConnectedLayer &&) = default;
+    /** Default destructor */
+    ~NELocallyConnectedLayer();
     /** Set the input and output tensors.
      *
      * @param[in]  input     Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -94,16 +96,16 @@ public:
     void prepare() override;
 
 private:
-    MemoryGroup                            _memory_group;
-    NEIm2ColKernel                         _input_im2col_kernel;
-    NEWeightsReshapeKernel                 _weights_reshape_kernel;
-    NELocallyConnectedMatrixMultiplyKernel _mm_kernel;
-    NECol2ImKernel                         _output_col2im_kernel;
-    Tensor                                 _input_im2col_reshaped;
-    Tensor                                 _weights_reshaped;
-    Tensor                                 _gemm_output;
-    bool                                   _is_prepared;
-    const ITensor                         *_original_weights;
+    MemoryGroup                                             _memory_group;
+    NEIm2Col                                                _input_im2col;
+    std::unique_ptr<NEWeightsReshapeKernel>                 _weights_reshape_kernel;
+    std::unique_ptr<NELocallyConnectedMatrixMultiplyKernel> _mm_kernel;
+    NECol2Im                                                _output_col2im;
+    Tensor                                                  _input_im2col_reshaped;
+    Tensor                                                  _weights_reshaped;
+    Tensor                                                  _gemm_output;
+    bool                                                    _is_prepared;
+    const ITensor                                          *_original_weights;
 };
 }
 #endif /* ARM_COMPUTE_NELOCALLYCONNECTEDLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEMagnitude.h b/arm_compute/runtime/NEON/functions/NEMagnitude.h
index 56c88c2125..50935b619d 100644
--- a/arm_compute/runtime/NEON/functions/NEMagnitude.h
+++ b/arm_compute/runtime/NEON/functions/NEMagnitude.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,6 +24,7 @@
 #ifndef ARM_COMPUTE_NEMAGNITUDE_H
 #define ARM_COMPUTE_NEMAGNITUDE_H
 
+#include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
@@ -34,6 +35,18 @@ class ITensor;
 class NEMagnitude : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEMagnitude() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEMagnitude(const NEMagnitude &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEMagnitude &operator=(const NEMagnitude &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEMagnitude(NEMagnitude &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEMagnitude &operator=(NEMagnitude &&) = delete;
+    /** Default destructor */
+    ~NEMagnitude();
     /** Initialise the kernel's inputs.
      *
      * @param[in]  input1   First tensor input. Data type supported: S16.
diff --git a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
index f13b4bd9e2..5b5bb5cb78 100644
--- a/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEMaxUnpoolingLayer.h
@@ -24,14 +24,16 @@
 #ifndef ARM_COMPUTE_NEMAXUNPOOLINGLAYER_H
 #define ARM_COMPUTE_NEMAXUNPOOLINGLAYER_H
 
+#include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
+class NEMemsetKernel;
+class NEMaxUnpoolingLayerKernel;
 
 /** Function to perform MaxUnpooling. This function calls the following NEON kernels:
  *
@@ -43,6 +45,16 @@ class NEMaxUnpoolingLayer : public IFunction
 public:
     /** Constructor */
     NEMaxUnpoolingLayer();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEMaxUnpoolingLayer(const NEMaxUnpoolingLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEMaxUnpoolingLayer &operator=(const NEMaxUnpoolingLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEMaxUnpoolingLayer(NEMaxUnpoolingLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEMaxUnpoolingLayer &operator=(NEMaxUnpoolingLayer &&) = delete;
+    /** Default destructor */
+    ~NEMaxUnpoolingLayer();
     /** Set the input and output tensors.
      *
      * @note Only supported pool size 2
@@ -70,8 +82,8 @@ public:
     void run() override;
 
 private:
-    NEMemsetKernel            _memset_kernel;
-    NEMaxUnpoolingLayerKernel _unpooling_layer_kernel;
+    std::unique_ptr<NEMemsetKernel>            _memset_kernel;
+    std::unique_ptr<NEMaxUnpoolingLayerKernel> _unpooling_layer_kernel;
 };
 }
 #endif /* ARM_COMPUTE_NEMAXUNPOOLINGLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEMeanStdDev.h b/arm_compute/runtime/NEON/functions/NEMeanStdDev.h
index 120f703140..875c3630c1 100644
--- a/arm_compute/runtime/NEON/functions/NEMeanStdDev.h
+++ b/arm_compute/runtime/NEON/functions/NEMeanStdDev.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,15 +24,18 @@
 #ifndef ARM_COMPUTE_NEMEANSTDDEV_H
 #define ARM_COMPUTE_NEMEANSTDDEV_H
 
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEMeanStdDevKernel.h"
+#include "arm_compute/core/IMultiImage.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
+#include <memory>
 
 #include <cstdint>
 
 namespace arm_compute
 {
+class NEMeanStdDevKernel;
+class NEFillBorderKernel;
+
 /** Basic function to execute mean and std deviation. This function calls the following NEON kernels:
  *
  * @ref NEMeanStdDevKernel
@@ -43,6 +46,16 @@ class NEMeanStdDev : public IFunction
 public:
     /** Default Constructor. */
     NEMeanStdDev();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEMeanStdDev(const NEMeanStdDev &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEMeanStdDev &operator=(const NEMeanStdDev &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEMeanStdDev(NEMeanStdDev &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEMeanStdDev &operator=(NEMeanStdDev &&) = delete;
+    /** Default destructor */
+    ~NEMeanStdDev();
     /** Initialise the kernel's inputs and outputs.
      *
      * @param[in, out] input  Input image. Data types supported: U8. (Written to only for border filling)
@@ -55,10 +68,10 @@ public:
     void run() override;
 
 private:
-    NEMeanStdDevKernel _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */
-    NEFillBorderKernel _fill_border_kernel; /**< Kernel that fills tensor's borders with zeroes. */
-    uint64_t           _global_sum;         /**< Variable that holds the global sum among calls in order to ease reduction */
-    uint64_t           _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */
+    std::unique_ptr<NEMeanStdDevKernel> _mean_stddev_kernel; /**< Kernel that standard deviation calculation. */
+    std::unique_ptr<NEFillBorderKernel> _fill_border_kernel; /**< Kernel that fills tensor's borders with zeroes. */
+    uint64_t                            _global_sum;         /**< Variable that holds the global sum among calls in order to ease reduction */
+    uint64_t                            _global_sum_squared; /**< Variable that holds the global sum of squared values among calls in order to ease reduction */
 };
 }
 #endif /*ARM_COMPUTE_NEMEANSTDDEV_H */
diff --git a/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h
index 132ab8a01b..31e376191c 100644
--- a/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,11 +30,24 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to execute mean and standard deviation normalization by calling @ref NEMeanStdDevNormalizationKernel */
 class NEMeanStdDevNormalizationLayer : public INESimpleFunctionNoBorder
 {
 public:
+    /** Constructor */
+    NEMeanStdDevNormalizationLayer() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEMeanStdDevNormalizationLayer(const NEMeanStdDevNormalizationLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEMeanStdDevNormalizationLayer &operator=(const NEMeanStdDevNormalizationLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEMeanStdDevNormalizationLayer(NEMeanStdDevNormalizationLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEMeanStdDevNormalizationLayer &operator=(NEMeanStdDevNormalizationLayer &&) = delete;
+    /** Default destructor */
+    ~NEMeanStdDevNormalizationLayer();
     /** Initialise the function's input and outputs.
      *
      * @note If the output tensor is a nullptr, the normalization will be performed in-place.
diff --git a/arm_compute/runtime/NEON/functions/NEMedian3x3.h b/arm_compute/runtime/NEON/functions/NEMedian3x3.h
index 8d860e2103..4b5f60382f 100644
--- a/arm_compute/runtime/NEON/functions/NEMedian3x3.h
+++ b/arm_compute/runtime/NEON/functions/NEMedian3x3.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h b/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h
index caa66a0c16..5959bbb0ad 100644
--- a/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h
+++ b/arm_compute/runtime/NEON/functions/NEMinMaxLocation.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,15 +25,17 @@
 #define ARM_COMPUTE_NEMINMAXLOCATION_H
 
 #include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/kernels/NEMinMaxLocationKernel.h"
 #include "arm_compute/runtime/Array.h"
 #include "arm_compute/runtime/IFunction.h"
 
 #include <cstdint>
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
+class NEMinMaxKernel;
+class NEMinMaxLocationKernel;
 using IImage = ITensor;
 
 /** Basic function to execute min and max location. This function calls the following NEON kernels:
@@ -46,6 +48,16 @@ class NEMinMaxLocation : public IFunction
 public:
     /** Constructor */
     NEMinMaxLocation();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEMinMaxLocation(const NEMinMaxLocation &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEMinMaxLocation &operator=(const NEMinMaxLocation &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEMinMaxLocation(NEMinMaxLocation &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEMinMaxLocation &operator=(NEMinMaxLocation &&) = delete;
+    /** Default destructor */
+    ~NEMinMaxLocation();
     /** Initialise the kernel's inputs and outputs.
      *
      * @param[in]  input     Input image. Data types supported: U8/S16/F32.
@@ -64,8 +76,8 @@ public:
     void run() override;
 
 private:
-    NEMinMaxKernel         _min_max;     /**< Kernel that performs min/max */
-    NEMinMaxLocationKernel _min_max_loc; /**< Kernel that extracts min/max locations */
+    std::unique_ptr<NEMinMaxKernel>         _min_max;     /**< Kernel that performs min/max */
+    std::unique_ptr<NEMinMaxLocationKernel> _min_max_loc; /**< Kernel that extracts min/max locations */
 };
 }
 #endif /*ARM_COMPUTE_NEMINMAXLOCATION_H */
diff --git a/arm_compute/runtime/NEON/functions/NENonLinearFilter.h b/arm_compute/runtime/NEON/functions/NENonLinearFilter.h
index d2a85837fd..fe1b190db2 100644
--- a/arm_compute/runtime/NEON/functions/NENonLinearFilter.h
+++ b/arm_compute/runtime/NEON/functions/NENonLinearFilter.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h b/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h
index 07d4b16cf1..bad633afac 100644
--- a/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h
+++ b/arm_compute/runtime/NEON/functions/NENonMaximaSuppression3x3.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
index fcdba12046..6519f9b4e6 100644
--- a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
@@ -26,8 +26,6 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NENormalizationLayerKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IMemoryManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
@@ -39,6 +37,7 @@
 namespace arm_compute
 {
 class ITensor;
+class NENormalizationLayerKernel;
 
 /** Basic function to compute a normalization layer. This function calls the following NEON kernels:
  *
@@ -52,6 +51,16 @@ class NENormalizationLayer : public IFunction
 public:
     /** Default constructor */
     NENormalizationLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NENormalizationLayer(const NENormalizationLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NENormalizationLayer &operator=(const NENormalizationLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NENormalizationLayer(NENormalizationLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NENormalizationLayer &operator=(NENormalizationLayer &&) = delete;
+    /** Default destructor */
+    ~NENormalizationLayer();
     /** Set the input and output tensors.
      *
      * @param[in]  input     Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
@@ -75,10 +84,10 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                _memory_group;  /**< Function memory group */
-    NENormalizationLayerKernel _norm_kernel;   /**< Normalization layer kernel */
-    NEPixelWiseMultiplication  _multiply_f;    /**< Pixel multiplication function */
-    Tensor                     _input_squared; /**< The intermediate buffer which stores results of squaring input */
+    MemoryGroup                                 _memory_group;  /**< Function memory group */
+    std::unique_ptr<NENormalizationLayerKernel> _norm_kernel;   /**< Normalization layer kernel */
+    NEPixelWiseMultiplication                   _multiply_f;    /**< Pixel multiplication function */
+    Tensor                                      _input_squared; /**< The intermediate buffer which stores results of squaring input */
 };
 }
 #endif /* ARM_COMPUTE_NENORMALIZATIONLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEOpticalFlow.h b/arm_compute/runtime/NEON/functions/NEOpticalFlow.h
index 141ee7ea41..a9f985a422 100644
--- a/arm_compute/runtime/NEON/functions/NEOpticalFlow.h
+++ b/arm_compute/runtime/NEON/functions/NEOpticalFlow.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,7 +25,7 @@
 #define ARM_COMPUTE_NEOPTICALFLOW_H
 
 #include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h"
+#include "arm_compute/core/Types.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/Array.h"
 #include "arm_compute/runtime/IFunction.h"
@@ -41,6 +41,7 @@
 namespace arm_compute
 {
 class Pyramid;
+class NELKTrackerKernel;
 
 /** Array of LK Internel Keypoints */
 using LKInternalKeypointArray = Array<NELKInternalKeypoint>;
@@ -62,6 +63,8 @@ public:
     NEOpticalFlow(const NEOpticalFlow &) = delete;
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     NEOpticalFlow &operator=(const NEOpticalFlow &) = delete;
+    /** Default destructor */
+    ~NEOpticalFlow();
     /**  Initialise the function input and output
      *
      * @param[in]  old_pyramid           Pointer to the pyramid for the old tensor. Data type supported U8
@@ -86,17 +89,17 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                    _memory_group;
-    std::vector<NEScharr3x3>       _func_scharr;
-    std::vector<NELKTrackerKernel> _kernel_tracker;
-    std::vector<Tensor>            _scharr_gx;
-    std::vector<Tensor>            _scharr_gy;
-    IKeyPointArray                *_new_points;
-    const IKeyPointArray          *_new_points_estimates;
-    const IKeyPointArray          *_old_points;
-    LKInternalKeypointArray        _new_points_internal;
-    LKInternalKeypointArray        _old_points_internal;
-    unsigned int                   _num_levels;
+    MemoryGroup                                     _memory_group;
+    std::vector<NEScharr3x3>                        _func_scharr;
+    std::vector<std::unique_ptr<NELKTrackerKernel>> _kernel_tracker;
+    std::vector<Tensor>                             _scharr_gx;
+    std::vector<Tensor>                             _scharr_gy;
+    IKeyPointArray                                 *_new_points;
+    const IKeyPointArray                           *_new_points_estimates;
+    const IKeyPointArray                           *_old_points;
+    LKInternalKeypointArray                         _new_points_internal;
+    LKInternalKeypointArray                         _old_points_internal;
+    unsigned int                                    _num_levels;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_NEOPTICALFLOW_H */
diff --git a/arm_compute/runtime/NEON/functions/NEPReluLayer.h b/arm_compute/runtime/NEON/functions/NEPReluLayer.h
index 756058b5ec..358e633000 100644
--- a/arm_compute/runtime/NEON/functions/NEPReluLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPReluLayer.h
@@ -31,6 +31,7 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 namespace experimental
 {
diff --git a/arm_compute/runtime/NEON/functions/NEPadLayer.h b/arm_compute/runtime/NEON/functions/NEPadLayer.h
index fcb7c36312..3fdbb0d73c 100644
--- a/arm_compute/runtime/NEON/functions/NEPadLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPadLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,13 +29,15 @@
 #include "arm_compute/runtime/NEON/functions/NEStridedSlice.h"
 #include "arm_compute/runtime/SubTensor.h"
 
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPadLayerKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/Tensor.h"
+#include <memory>
 
 namespace arm_compute
 {
+class NECopyKernel;
+class NEPadLayerKernel;
+
 /** Basic function to pad a tensor. This function calls the following NEON functions/kernels:
  *
  *  - For padding mode = PaddingMode::CONSTANT:
@@ -49,8 +51,18 @@ namespace arm_compute
 class NEPadLayer : public IFunction
 {
 public:
-    /** Default constructor*/
+    /** Default Constructor */
     NEPadLayer();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEPadLayer(const NEPadLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEPadLayer &operator=(const NEPadLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEPadLayer(NEPadLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEPadLayer &operator=(NEPadLayer &&) = delete;
+    /** Default destructor */
+    ~NEPadLayer();
     /** Initialize the function
      *
      * @param[in]  input          Source tensor. Data types supported: All.
@@ -97,15 +109,15 @@ private:
     void configure_reflect_symmetric_mode(ITensor *input, ITensor *output);
 
 private:
-    NECopyKernel                    _copy_kernel;
-    NEPadLayerKernel                _pad_kernel;
-    PaddingMode                     _mode;
-    PaddingList                     _padding;
-    uint32_t                        _num_dimensions;
-    std::vector<NEStridedSlice>     _slice_functions;
-    std::vector<NEConcatenateLayer> _concat_functions;
-    std::vector<Tensor>             _slice_results;
-    std::vector<Tensor>             _concat_results;
+    std::unique_ptr<NECopyKernel>     _copy_kernel;
+    std::unique_ptr<NEPadLayerKernel> _pad_kernel;
+    PaddingMode                       _mode;
+    PaddingList                       _padding;
+    uint32_t                          _num_dimensions;
+    std::vector<NEStridedSlice>       _slice_functions;
+    std::vector<NEConcatenateLayer>   _concat_functions;
+    std::vector<Tensor>               _slice_results;
+    std::vector<Tensor>               _concat_results;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_NEPADLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEPermute.h b/arm_compute/runtime/NEON/functions/NEPermute.h
index 3be42c8346..ef8854b360 100644
--- a/arm_compute/runtime/NEON/functions/NEPermute.h
+++ b/arm_compute/runtime/NEON/functions/NEPermute.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,6 +32,7 @@ namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEPermuteKernel */
 class NEPermute : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NEPhase.h b/arm_compute/runtime/NEON/functions/NEPhase.h
index c492073e22..626023c099 100644
--- a/arm_compute/runtime/NEON/functions/NEPhase.h
+++ b/arm_compute/runtime/NEON/functions/NEPhase.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,11 +24,13 @@
 #ifndef ARM_COMPUTE_NEPHASE_H
 #define ARM_COMPUTE_NEPHASE_H
 
+#include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEMagnitudePhaseKernel */
 class NEPhase : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
index e1072980cf..91cf44ff2e 100644
--- a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
+++ b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h
@@ -31,6 +31,7 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 namespace experimental
 {
diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
index 000c754ec8..b45290fb46 100644
--- a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
@@ -26,13 +26,15 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h"
 #include "arm_compute/core/Types.h"
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
+class NEPoolingLayerKernel;
+class NEFillBorderKernel;
 
 /** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following NEON kernels:
  *
@@ -44,6 +46,16 @@ class NEPoolingLayer : public IFunction
 public:
     /** Constructor */
     NEPoolingLayer();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEPoolingLayer(const NEPoolingLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEPoolingLayer &operator=(const NEPoolingLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEPoolingLayer(NEPoolingLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEPoolingLayer &operator=(NEPoolingLayer &&) = delete;
+    /** Default destructor */
+    ~NEPoolingLayer();
     /** Set the input and output tensors.
      *
      * @note F16 is supported for pool sizes 2 and 3 only
@@ -71,10 +83,10 @@ public:
     void run() override;
 
 private:
-    NEPoolingLayerKernel _pooling_layer_kernel;
-    NEFillBorderKernel   _border_handler;
-    bool                 _is_global_pooling_layer;
-    DataLayout           _data_layout;
+    std::unique_ptr<NEPoolingLayerKernel> _pooling_layer_kernel;
+    std::unique_ptr<NEFillBorderKernel>   _border_handler;
+    bool                                  _is_global_pooling_layer;
+    DataLayout                            _data_layout;
 };
 }
 #endif /* ARM_COMPUTE_NEPOOLINGLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
index d4bb42fd07..3cc79fa28e 100644
--- a/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPriorBoxLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,13 +24,13 @@
 #ifndef ARM_COMPUTE_NEPRIORBOXLAYER_H
 #define ARM_COMPUTE_NEPRIORBOXLAYER_H
 
-#include "arm_compute/core/NEON/kernels/NEPriorBoxLayerKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEPriorBoxLayerKernel. */
 class NEPriorBoxLayer : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
index 7c572de874..17ad5a354b 100644
--- a/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQLSTMLayer.h
@@ -24,24 +24,27 @@
 #ifndef ARM_COMPUTE_NEQLSTMLAYER_H
 #define ARM_COMPUTE_NEQLSTMLAYER_H
 
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMLowpReductionKernel.h"
-#include "arm_compute/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
 #include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
+#include "arm_compute/runtime/NEON/functions/NECopy.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
 #include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
 #include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
 #include "arm_compute/runtime/NEON/functions/NETranspose.h"
+#include "support/MemorySupport.h"
 
 #include "arm_compute/runtime/common/LSTMParams.h"
+#include <memory>
 
 namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ITensorInfo;
+class NEQLSTMLayerNormalizationKernel;
+class NEGEMMLowpMatrixAReductionKernel;
 
 /** Basic function to run @ref NEQLSTMLayer
  *
@@ -70,6 +73,8 @@ public:
     NEQLSTMLayer &operator=(const NEQLSTMLayer &) = delete;
     /** Default move assignment operator */
     NEQLSTMLayer &operator=(NEQLSTMLayer &&) = default;
+    /** Default destructor */
+    ~NEQLSTMLayer();
     /** Initialize function's tensors.
      *
      * @param[in]  input                       Source tensor. Input is a 2D tensor with dimensions [input_size, batch_size]. Data types supported: QASYMM8_SIGNED.
@@ -204,7 +209,7 @@ private:
                       Tensor *outstage_res, float gemmlowp_scale,
                       const TensorInfo &mm_res_info, const TensorInfo &outstage_tensor_info);
 
-    MemoryGroup _memory_group{};
+    MemoryGroup _memory_group;
 
     /** A small internel kernel do the copy between two tensors */
     class TensorCopyKernel
@@ -217,6 +222,8 @@ private:
         Window   _window{};
 
     public:
+        /** Destructor */
+        ~TensorCopyKernel();
         /** Static function to check if given info will lead to a valid configuration of @ref NEQLSTMLayer::TensorCopyKernel
          *
          * @param[in] src Source tensor info.
@@ -236,79 +243,79 @@ private:
     };
 
     // Functions used
-    NETranspose                      _transpose_input_to_forget_weights{};
-    NETranspose                      _transpose_input_to_cell_weights{};
-    NETranspose                      _transpose_input_to_output_weights{};
-    NETranspose                      _transpose_input_to_input_weights{};
-    NETranspose                      _transpose_recurrent_to_forget_weights{};
-    NETranspose                      _transpose_recurrent_to_cell_weights{};
-    NETranspose                      _transpose_recurrent_to_output_weights{};
-    NETranspose                      _transpose_recurrent_to_input_weights{};
-    NETranspose                      _transpose_projection_weights{};
-    NEGEMMLowpMatrixAReductionKernel _input_to_input_reduction{};
-    NEGEMMLowpMatrixAReductionKernel _recurrent_to_input_reduction{};
-    NEGEMMLowpMatrixAReductionKernel _input_to_forget_reduction{};
-    NEGEMMLowpMatrixAReductionKernel _recurrent_to_forget_reduction{};
-    NEGEMMLowpMatrixAReductionKernel _input_to_cell_reduction{};
-    NEGEMMLowpMatrixAReductionKernel _recurrent_to_cell_reduction{};
-    NEGEMMLowpMatrixAReductionKernel _input_to_output_reduction{};
-    NEGEMMLowpMatrixAReductionKernel _recurrent_to_output_reduction{};
-    NEGEMMLowpMatrixAReductionKernel _projection_reduction{};
-    NEArithmeticAddition             _projection_bias_add{};
-    NEGEMMLowpMatrixMultiplyCore     _mm_input_to_forget{};
-    NEGEMMLowpMatrixMultiplyCore     _mm_recurrent_to_forget{};
-    NEPixelWiseMultiplication        _pixelwise_mul_cell_to_forget{};
-    NEGEMMLowpOutputStage            _input_to_forget_outstage{};
-    NEGEMMLowpOutputStage            _recurrent_to_forget_outstage{};
-    NEGEMMLowpOutputStage            _cell_to_forget_outstage{};
-    NEArithmeticAddition             _accumulate_input_recurrent_forget{};
-    NEArithmeticAddition             _accumulate_cell_forget{};
-    NEActivationLayer                _forget_gate_sigmoid{};
-    NEGEMMLowpMatrixMultiplyCore     _mm_input_to_cell{};
-    NEGEMMLowpOutputStage            _input_to_cell_outstage{};
-    NEGEMMLowpMatrixMultiplyCore     _mm_recurrent_to_cell{};
-    NEGEMMLowpOutputStage            _recurrent_to_cell_outstage{};
-    NEArithmeticAddition             _accumulate_input_recurrent_modulation{};
-    NEActivationLayer                _cell_gate_tanh{};
-    NEArithmeticSubtraction          _input_gate_sub{};
-    NEGEMMLowpMatrixMultiplyCore     _mm_input_to_input{};
-    NEGEMMLowpOutputStage            _input_to_input_outstage{};
-    NEGEMMLowpMatrixMultiplyCore     _mm_recurrent_to_input{};
-    NEGEMMLowpOutputStage            _recurrent_to_input_outstage{};
-    NEArithmeticAddition             _accumulate_input_recurrent_input{};
-    NEPixelWiseMultiplication        _pixelwise_mul_cell_to_input{};
-    NEGEMMLowpOutputStage            _cell_to_input_outstage{};
-    NEArithmeticAddition             _accumulate_cell_input{};
-    NEActivationLayer                _input_gate_sigmoid{};
-    NEPixelWiseMultiplication        _pixelwise_mul_forget_cell{};
-    NEPixelWiseMultiplication        _pixelwise_mul_input_cell{};
-    NEArithmeticAddition             _add_forget_cell{};
-    NEActivationLayer                _cell_clip{};
-    NEGEMMLowpMatrixMultiplyCore     _mm_input_to_output{};
-    NEGEMMLowpOutputStage            _input_to_output_outstage{};
-    NEGEMMLowpMatrixMultiplyCore     _mm_recurrent_to_output{};
-    NEGEMMLowpOutputStage            _recurrent_to_output_outstage{};
-    NEArithmeticAddition             _accumulate_input_recurrent_output{};
-    NEPixelWiseMultiplication        _pixelwise_mul_cell_to_output{};
-    NEGEMMLowpOutputStage            _cell_to_output_outstage{};
-    NEArithmeticAddition             _accumulate_cell_to_output{};
-    NEActivationLayer                _output_gate_sigmoid{};
-    NEActivationLayer                _hidden_tanh{};
-    NEPixelWiseMultiplication        _pixelwise_mul_hidden{};
-    NEGEMMLowpOutputStage            _hidden_outstage{};
-    NEGEMMLowpMatrixMultiplyCore     _mm_projection{};
-    NEGEMMLowpOutputStage            _projection_outstage{};
-    NEArithmeticAddition             _accumulate_projection{};
-    NEActivationLayer                _projection_clip{};
+    NETranspose                                       _transpose_input_to_forget_weights;
+    NETranspose                                       _transpose_input_to_cell_weights;
+    NETranspose                                       _transpose_input_to_output_weights;
+    NETranspose                                       _transpose_input_to_input_weights;
+    NETranspose                                       _transpose_recurrent_to_forget_weights;
+    NETranspose                                       _transpose_recurrent_to_cell_weights;
+    NETranspose                                       _transpose_recurrent_to_output_weights;
+    NETranspose                                       _transpose_recurrent_to_input_weights;
+    NETranspose                                       _transpose_projection_weights;
+    std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_input_reduction;
+    std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_input_reduction;
+    std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_forget_reduction;
+    std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_forget_reduction;
+    std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_cell_reduction;
+    std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_cell_reduction;
+    std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _input_to_output_reduction;
+    std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _recurrent_to_output_reduction;
+    std::unique_ptr<NEGEMMLowpMatrixAReductionKernel> _projection_reduction;
+    NEArithmeticAddition                              _projection_bias_add;
+    NEGEMMLowpMatrixMultiplyCore                      _mm_input_to_forget;
+    NEGEMMLowpMatrixMultiplyCore                      _mm_recurrent_to_forget;
+    NEPixelWiseMultiplication                         _pixelwise_mul_cell_to_forget;
+    NEGEMMLowpOutputStage                             _input_to_forget_outstage;
+    NEGEMMLowpOutputStage                             _recurrent_to_forget_outstage;
+    NEGEMMLowpOutputStage                             _cell_to_forget_outstage;
+    NEArithmeticAddition                              _accumulate_input_recurrent_forget;
+    NEArithmeticAddition                              _accumulate_cell_forget;
+    NEActivationLayer                                 _forget_gate_sigmoid;
+    NEGEMMLowpMatrixMultiplyCore                      _mm_input_to_cell;
+    NEGEMMLowpOutputStage                             _input_to_cell_outstage;
+    NEGEMMLowpMatrixMultiplyCore                      _mm_recurrent_to_cell;
+    NEGEMMLowpOutputStage                             _recurrent_to_cell_outstage;
+    NEArithmeticAddition                              _accumulate_input_recurrent_modulation;
+    NEActivationLayer                                 _cell_gate_tanh;
+    NEArithmeticSubtraction                           _input_gate_sub;
+    NEGEMMLowpMatrixMultiplyCore                      _mm_input_to_input;
+    NEGEMMLowpOutputStage                             _input_to_input_outstage;
+    NEGEMMLowpMatrixMultiplyCore                      _mm_recurrent_to_input;
+    NEGEMMLowpOutputStage                             _recurrent_to_input_outstage;
+    NEArithmeticAddition                              _accumulate_input_recurrent_input;
+    NEPixelWiseMultiplication                         _pixelwise_mul_cell_to_input;
+    NEGEMMLowpOutputStage                             _cell_to_input_outstage;
+    NEArithmeticAddition                              _accumulate_cell_input;
+    NEActivationLayer                                 _input_gate_sigmoid;
+    NEPixelWiseMultiplication                         _pixelwise_mul_forget_cell;
+    NEPixelWiseMultiplication                         _pixelwise_mul_input_cell;
+    NEArithmeticAddition                              _add_forget_cell;
+    NEActivationLayer                                 _cell_clip;
+    NEGEMMLowpMatrixMultiplyCore                      _mm_input_to_output;
+    NEGEMMLowpOutputStage                             _input_to_output_outstage;
+    NEGEMMLowpMatrixMultiplyCore                      _mm_recurrent_to_output;
+    NEGEMMLowpOutputStage                             _recurrent_to_output_outstage;
+    NEArithmeticAddition                              _accumulate_input_recurrent_output;
+    NEPixelWiseMultiplication                         _pixelwise_mul_cell_to_output;
+    NEGEMMLowpOutputStage                             _cell_to_output_outstage;
+    NEArithmeticAddition                              _accumulate_cell_to_output;
+    NEActivationLayer                                 _output_gate_sigmoid;
+    NEActivationLayer                                 _hidden_tanh;
+    NEPixelWiseMultiplication                         _pixelwise_mul_hidden;
+    NEGEMMLowpOutputStage                             _hidden_outstage;
+    NEGEMMLowpMatrixMultiplyCore                      _mm_projection;
+    NEGEMMLowpOutputStage                             _projection_outstage;
+    NEArithmeticAddition                              _accumulate_projection;
+    NEActivationLayer                                 _projection_clip;
 
-    TensorCopyKernel _projection_bias_copy{};
-    TensorCopyKernel _projection_output_to_accumulate_copy{};
-    TensorCopyKernel _projection_accumulate_to_output_copy{};
-    TensorCopyKernel _hidden_to_output_copy{};
+    TensorCopyKernel _projection_bias_copy;
+    TensorCopyKernel _projection_output_to_accumulate_copy;
+    TensorCopyKernel _projection_accumulate_to_output_copy;
+    TensorCopyKernel _hidden_to_output_copy;
 
-    std::array<NEQLSTMLayerNormalizationKernel, _layer_norm_count> _layer_norms{ {} };
+    std::array<std::unique_ptr<NEQLSTMLayerNormalizationKernel>, _layer_norm_count> _layer_norms;
 
-    NECopyKernel _copy_output{};
+    NECopy _copy_output;
 
     // Tensor pointers
     const ITensor *_input_to_input_weights
@@ -324,8 +331,8 @@ private:
     const ITensor *_recurrent_to_cell_weights{ nullptr };
     const ITensor *_recurrent_to_output_weights{ nullptr };
     const ITensor *_projection_weights{ nullptr };
-    std::array<const ITensor *, _layer_norm_count> _layer_norm_weights{ {} };
-    std::array<const ITensor *, _layer_norm_count> _layer_norm_bias{ {} };
+    std::array<const ITensor *, _layer_norm_count> _layer_norm_weights{};
+    std::array<const ITensor *, _layer_norm_count> _layer_norm_bias{};
 
     using LayerNormIndexType = typename std::underlying_type<LayerNormGate>::type;
     inline LayerNormIndexType getGateIndex(LayerNormGate g)
@@ -353,32 +360,13 @@ private:
         return _layer_norm_bias[getGateIndex(g)];
     }
 
-    inline NEQLSTMLayerNormalizationKernel &get_layer_norm(LayerNormGate g)
+    inline std::unique_ptr<NEQLSTMLayerNormalizationKernel> &get_layer_norm(LayerNormGate g)
     {
         return _layer_norms[getGateIndex(g)];
     }
 
-    inline void configure_layer_norm(LayerNormGate g, const ITensor *in)
-    {
-        ARM_COMPUTE_ERROR_ON(!_has_layer_norm);
-
-        Tensor &out = get_layer_norm_output(g);
-        _memory_group.manage(&out);
-        out.allocator()->init(*(in->info()));
-
-        get_layer_norm(g).configure(in, &out, get_layer_norm_weight(g), get_layer_norm_bias(g));
-    }
-
-    inline static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias)
-    {
-        // Output quantization scale will be different, but ignored here
-        // since it will be configured at configure() stage.
-        const TensorInfo out
-        {
-            in
-        };
-        return NEQLSTMLayerNormalizationKernel::validate(&in, &out, &weight, &bias);
-    }
+    void configure_layer_norm(LayerNormGate g, const ITensor *in);
+    static Status validate_layer_norm(const ITensorInfo &in, const ITensorInfo &weight, const ITensorInfo &bias);
 
     // Temporary tensors
     Tensor _input_to_forget_weights_transposed{ nullptr };
@@ -434,7 +422,7 @@ private:
     Tensor _projection_out_res{ nullptr };
     Tensor _projection_accumulate_res{ nullptr };
     Tensor _ones{ nullptr };
-    std::array<Tensor, _layer_norm_count> _layer_norm_output{ {} };
+    std::array<Tensor, _layer_norm_count> _layer_norm_output{};
 
     inline Tensor &get_layer_norm_output(LayerNormGate g)
     {
diff --git a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
index 266b3df87a..36302f4741 100644
--- a/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEQuantizationLayer.h
@@ -26,7 +26,6 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEQuantizationLayerKernel.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 #include "arm_compute/core/Types.h"
@@ -34,6 +33,7 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to simulate a quantization layer. This function calls the following NEON kernels:
  *
@@ -44,8 +44,6 @@ class ITensor;
 class NEQuantizationLayer : public INESimpleFunctionNoBorder
 {
 public:
-    /** Default constructor */
-    NEQuantizationLayer() = default;
     /** Set the input and output tensors.
      *
      * @param[in]  input  Source tensor. The dimensions over the third will be interpreted as batches. Data types supported: QASYMM8/QASYMM8_SIGNED/F32/F16.
diff --git a/arm_compute/runtime/NEON/functions/NERNNLayer.h b/arm_compute/runtime/NEON/functions/NERNNLayer.h
index 12e3ef9c57..74fdc59af6 100644
--- a/arm_compute/runtime/NEON/functions/NERNNLayer.h
+++ b/arm_compute/runtime/NEON/functions/NERNNLayer.h
@@ -24,8 +24,6 @@
 #ifndef ARM_COMPUTE_NERNNLAYER_H
 #define ARM_COMPUTE_NERNNLAYER_H
 
-#include "arm_compute/core/NEON/kernels/NECopyKernel.h"
-
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
@@ -36,6 +34,7 @@ namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class NECopyKernel;
 
 /** Basic function to run @ref NERNNLayer */
 class NERNNLayer : public IFunction
@@ -51,6 +50,8 @@ public:
     NERNNLayer &operator=(const NERNNLayer &) = delete;
     /** Default move assignment operator */
     NERNNLayer &operator=(NERNNLayer &&) = default;
+    /** Default destructor */
+    ~NERNNLayer();
     /** Initialize the function
      *
      * @param[in]     input             Input is a 2-D tensor of shape [input_size, batch_size]. Data types supported: F16/F32
@@ -82,16 +83,16 @@ public:
     void prepare() override;
 
 private:
-    MemoryGroup           _memory_group;
-    NEGEMM                _gemm_state_f;
-    NEArithmeticAddition  _add_f;
-    NEActivationLayer     _activation;
-    NEFullyConnectedLayer _fully_connected;
-    NECopyKernel          _copy_kernel;
-    Tensor                _fully_connected_out;
-    Tensor                _gemm_output;
-    Tensor                _add_output;
-    bool                  _is_prepared;
+    MemoryGroup                   _memory_group;
+    NEGEMM                        _gemm_state_f;
+    NEArithmeticAddition          _add_f;
+    NEActivationLayer             _activation;
+    NEFullyConnectedLayer         _fully_connected;
+    std::unique_ptr<NECopyKernel> _copy_kernel;
+    Tensor                        _fully_connected_out;
+    Tensor                        _gemm_output;
+    Tensor                        _add_output;
+    bool                          _is_prepared;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NERNNLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h b/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h
index 3e8db55f99..1d992f53df 100644
--- a/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEROIAlignLayer.h
@@ -24,11 +24,13 @@
 #ifndef ARM_COMPUTE_NEROIALIGNLAYER_H
 #define ARM_COMPUTE_NEROIALIGNLAYER_H
 
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEROIAlignLayerKernel.
  *
@@ -36,7 +38,7 @@ class ITensor;
  * -# @ref NEROIAlignLayerKernel
  *
  */
-class NEROIAlignLayer : public INESimpleFunction
+class NEROIAlignLayer : public INESimpleFunctionNoBorder
 {
 public:
     /** Set the input and output tensors.
diff --git a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h
index 08885d0e58..0b9b4f75fc 100644
--- a/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEROIPoolingLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,11 +27,13 @@
 #include "arm_compute/runtime/IFunction.h"
 
 #include "arm_compute/core/IArray.h"
-#include "arm_compute/core/NEON/kernels/NEROIPoolingLayerKernel.h"
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
+class NEROIPoolingLayerKernel;
+class ROIPoolingLayerInfo;
 
 /** Basic function to run @ref NEROIPoolingLayerKernel.
  *
@@ -44,6 +46,16 @@ class NEROIPoolingLayer : public IFunction
 public:
     /** Constructor */
     NEROIPoolingLayer();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEROIPoolingLayer(const NEROIPoolingLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEROIPoolingLayer &operator=(const NEROIPoolingLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEROIPoolingLayer(NEROIPoolingLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEROIPoolingLayer &operator=(NEROIPoolingLayer &&) = delete;
+    /** Default destructor */
+    ~NEROIPoolingLayer();
     /** Set the input and output tensors.
      *
      * @param[in]  input     Source tensor. Data types supported: F32.
@@ -63,7 +75,7 @@ public:
     void run() override;
 
 private:
-    NEROIPoolingLayerKernel _roi_kernel;
+    std::unique_ptr<NEROIPoolingLayerKernel> _roi_kernel;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NEROIPOOLINGLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NERange.h b/arm_compute/runtime/NEON/functions/NERange.h
index 04889d4d6f..844a47d2ae 100644
--- a/arm_compute/runtime/NEON/functions/NERange.h
+++ b/arm_compute/runtime/NEON/functions/NERange.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,13 +24,15 @@
 #ifndef ARM_COMPUTE_NERANGE_H
 #define ARM_COMPUTE_NERANGE_H
 
-#include "arm_compute/core/NEON/kernels/NERangeKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
+class NERangeKernel;
 
 /** Basic function to run @ref NERangeKernel
  *
@@ -42,6 +44,16 @@ class NERange : public IFunction
 public:
     /** Default constructor */
     NERange();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NERange(const NERange &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NERange &operator=(const NERange &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NERange(NERange &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NERange &operator=(NERange &&) = delete;
+    /** Default destructor */
+    ~NERange();
     /** Initialize the kernel's start, end, step and output tensor.
      *
      * @param[out] output Output tensor. Data types supported: U8/S8/U16/S16/U32/S32/F16/F32.
@@ -65,7 +77,7 @@ public:
     void run() override;
 
 private:
-    NERangeKernel _kernel;
+    std::unique_ptr<NERangeKernel> _kernel;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NERANGE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEReduceMean.h b/arm_compute/runtime/NEON/functions/NEReduceMean.h
index eee3f7f799..89cd09812b 100644
--- a/arm_compute/runtime/NEON/functions/NEReduceMean.h
+++ b/arm_compute/runtime/NEON/functions/NEReduceMean.h
@@ -26,7 +26,6 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
@@ -43,6 +42,16 @@ class NEReduceMean : public IFunction
 public:
     /** Constructor */
     NEReduceMean(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEReduceMean(const NEReduceMean &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEReduceMean &operator=(const NEReduceMean &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEReduceMean(NEReduceMean &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEReduceMean &operator=(NEReduceMean &&) = delete;
+    /** Default destructor */
+    ~NEReduceMean();
     /** Configure kernel
      *
      * @note Supported tensor rank: up to 4
diff --git a/arm_compute/runtime/NEON/functions/NEReductionOperation.h b/arm_compute/runtime/NEON/functions/NEReductionOperation.h
index aafccb00e3..8186e2e355 100644
--- a/arm_compute/runtime/NEON/functions/NEReductionOperation.h
+++ b/arm_compute/runtime/NEON/functions/NEReductionOperation.h
@@ -26,13 +26,14 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEReductionOperationKernel.h"
 #include "arm_compute/runtime/NEON/functions/NEReshapeLayer.h"
 #include "arm_compute/runtime/Tensor.h"
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
+class NEReductionOperationKernel;
 
 /** Basic function to simulate a reduction operation. This function calls the following NEON kernels:
  *
@@ -45,6 +46,16 @@ class NEReductionOperation : public IFunction
 public:
     /** Default constructor */
     NEReductionOperation(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEReductionOperation(const NEReductionOperation &) = delete;
+    /** Default move constructor */
+    NEReductionOperation(NEReductionOperation &&) = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEReductionOperation &operator=(const NEReductionOperation &) = delete;
+    /** Default move assignment operator */
+    NEReductionOperation &operator=(NEReductionOperation &&) = default;
+    /** Default destructor */
+    ~NEReductionOperation();
     /** Set the input and output tensors.
      *
      * @param[in, out] input     Source tensor. Data type supported: QASYMM8_SIGNED/QASYMM8/F16/F32/S32. Data layouts supported: NCHW. (Written to only for border_size != 0)
@@ -71,13 +82,13 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                _memory_group;
-    NEReductionOperationKernel _reduction_kernel;
-    NEReshapeLayer             _reshape;
-    Tensor                     _output_internal;
-    size_t                     _window_split;
-    int                        _reduction_axis;
-    bool                       _is_reshape_required;
+    MemoryGroup                                 _memory_group;
+    std::unique_ptr<NEReductionOperationKernel> _reduction_kernel;
+    NEReshapeLayer                              _reshape;
+    Tensor                                      _output_internal;
+    size_t                                      _window_split;
+    int                                         _reduction_axis;
+    bool                                        _is_reshape_required;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NEREDUCTIONOPERATION_H */
diff --git a/arm_compute/runtime/NEON/functions/NERemap.h b/arm_compute/runtime/NEON/functions/NERemap.h
index f087bd2e3c..d870ce6e51 100644
--- a/arm_compute/runtime/NEON/functions/NERemap.h
+++ b/arm_compute/runtime/NEON/functions/NERemap.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEReorgLayer.h b/arm_compute/runtime/NEON/functions/NEReorgLayer.h
index 19385e1b74..f76d1d252c 100644
--- a/arm_compute/runtime/NEON/functions/NEReorgLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEReorgLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,6 +31,7 @@ namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEReorgLayerKernel */
 class NEReorgLayer : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
index 2ca6660139..641a96e0f9 100644
--- a/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEReshapeLayer.h
@@ -24,7 +24,6 @@
 #ifndef ARM_COMPUTE_NERESHAPELAYER_H
 #define ARM_COMPUTE_NERESHAPELAYER_H
 
-#include "arm_compute/core/NEON/kernels/NEReshapeLayerKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/NEON/INEOperator.h"
@@ -81,6 +80,18 @@ namespace experimental
 class NEReshape : public INEOperator
 {
 public:
+    /** Default Constructor */
+    NEReshape() = default;
+    /** Default Destructor */
+    ~NEReshape();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEReshape(const NEReshape &) = delete;
+    /** Default move constructor */
+    NEReshape(NEReshapeLayer &&);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEReshape &operator=(const NEReshape &) = delete;
+    /** Default move assignment operator */
+    NEReshape &operator=(NEReshape &&);
     /** Initialise the kernel's inputs and outputs
      *
      * @param[in]  input  Input tensor info. Data type supported: All
diff --git a/arm_compute/runtime/NEON/functions/NEReverse.h b/arm_compute/runtime/NEON/functions/NEReverse.h
index 7a4566db28..2048dafcb5 100644
--- a/arm_compute/runtime/NEON/functions/NEReverse.h
+++ b/arm_compute/runtime/NEON/functions/NEReverse.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,6 +31,7 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEReverseKernel */
 class NEReverse : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NEScale.h b/arm_compute/runtime/NEON/functions/NEScale.h
index 4063e558a6..fceda83510 100644
--- a/arm_compute/runtime/NEON/functions/NEScale.h
+++ b/arm_compute/runtime/NEON/functions/NEScale.h
@@ -24,7 +24,7 @@
 #ifndef ARM_COMPUTE_NESCALEIMAGE_H
 #define ARM_COMPUTE_NESCALEIMAGE_H
 
-#include "arm_compute/core/NEON/kernels/NEScaleKernel.h"
+#include "arm_compute/core/KernelDescriptors.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 #include "arm_compute/runtime/Tensor.h"
diff --git a/arm_compute/runtime/NEON/functions/NESelect.h b/arm_compute/runtime/NEON/functions/NESelect.h
index 258ac5d64d..c66fbfa7d4 100644
--- a/arm_compute/runtime/NEON/functions/NESelect.h
+++ b/arm_compute/runtime/NEON/functions/NESelect.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,15 +25,16 @@
 #define ARM_COMPUTE_NESELECT_H
 
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+#include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
 namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NESelect */
-class NESelect : public INESimpleFunction
+class NESelect : public INESimpleFunctionNoBorder
 {
 public:
     /** Initialise the kernel's inputs and output.
diff --git a/arm_compute/runtime/NEON/functions/NESobel3x3.h b/arm_compute/runtime/NEON/functions/NESobel3x3.h
index 4dbdfd223b..a0b8f6296f 100644
--- a/arm_compute/runtime/NEON/functions/NESobel3x3.h
+++ b/arm_compute/runtime/NEON/functions/NESobel3x3.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NESobel5x5.h b/arm_compute/runtime/NEON/functions/NESobel5x5.h
index b5365bc1b7..b17f9e7972 100644
--- a/arm_compute/runtime/NEON/functions/NESobel5x5.h
+++ b/arm_compute/runtime/NEON/functions/NESobel5x5.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,8 +24,6 @@
 #ifndef ARM_COMPUTE_NESOBEL5x5_H
 #define ARM_COMPUTE_NESOBEL5x5_H
 
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NESobel5x5Kernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
@@ -38,6 +36,9 @@
 namespace arm_compute
 {
 class ITensor;
+class NESobel5x5HorKernel;
+class NESobel5x5VertKernel;
+class NEFillBorderKernel;
 
 /** Basic function to execute sobel 5x5 filter. This function calls the following NEON kernels:
  *
@@ -51,6 +52,16 @@ class NESobel5x5 : public IFunction
 public:
     /** Default constructor */
     NESobel5x5(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NESobel5x5(const NESobel5x5 &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NESobel5x5 &operator=(const NESobel5x5 &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NESobel5x5(NESobel5x5 &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NESobel5x5 &operator=(NESobel5x5 &&) = delete;
+    /** Default destructor */
+    ~NESobel5x5();
     /** Initialise the function's source, destinations and border mode.
      *
      * @note At least one of output_x or output_y must be not NULL.
@@ -68,12 +79,12 @@ public:
     void run() override;
 
 protected:
-    MemoryGroup          _memory_group;   /**< Function memory group */
-    NESobel5x5HorKernel  _sobel_hor;      /**< Sobel Horizontal 5x5 kernel */
-    NESobel5x5VertKernel _sobel_vert;     /**< Sobel Vertical 5x5 kernel */
-    Tensor               _tmp_x;          /**< Temporary buffer for Sobel X */
-    Tensor               _tmp_y;          /**< Temporary buffer for Sobel Y */
-    NEFillBorderKernel   _border_handler; /**< Kernel to handle tensor borders */
+    MemoryGroup                           _memory_group;   /**< Function memory group */
+    std::unique_ptr<NESobel5x5HorKernel>  _sobel_hor;      /**< Sobel Horizontal 5x5 kernel */
+    std::unique_ptr<NESobel5x5VertKernel> _sobel_vert;     /**< Sobel Vertical 5x5 kernel */
+    Tensor                                _tmp_x;          /**< Temporary buffer for Sobel X */
+    Tensor                                _tmp_y;          /**< Temporary buffer for Sobel Y */
+    std::unique_ptr<NEFillBorderKernel>   _border_handler; /**< Kernel to handle tensor borders */
 };
 }
 #endif /*ARM_COMPUTE_NESOBEL5x5_H */
diff --git a/arm_compute/runtime/NEON/functions/NESobel7x7.h b/arm_compute/runtime/NEON/functions/NESobel7x7.h
index 925444d85b..cd0510a997 100644
--- a/arm_compute/runtime/NEON/functions/NESobel7x7.h
+++ b/arm_compute/runtime/NEON/functions/NESobel7x7.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,8 +24,6 @@
 #ifndef ARM_COMPUTE_NESOBEL7x7_H
 #define ARM_COMPUTE_NESOBEL7x7_H
 
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NESobel7x7Kernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/IMemoryManager.h"
@@ -38,6 +36,9 @@
 namespace arm_compute
 {
 class ITensor;
+class NESobel7x7HorKernel;
+class NESobel7x7VertKernel;
+class NEFillBorderKernel;
 
 /** Basic function to execute sobel 7x7 filter. This function calls the following NEON kernels:
  *
@@ -51,6 +52,16 @@ class NESobel7x7 : public IFunction
 public:
     /** Default constructor */
     NESobel7x7(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NESobel7x7(const NESobel7x7 &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NESobel7x7 &operator=(const NESobel7x7 &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NESobel7x7(NESobel7x7 &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NESobel7x7 &operator=(NESobel7x7 &&) = delete;
+    /** Default destructor */
+    ~NESobel7x7();
     /** Initialise the function's source, destinations and border mode.
      *
      * @note At least one of output_x or output_y must be not NULL.
@@ -68,12 +79,12 @@ public:
     void run() override;
 
 protected:
-    MemoryGroup          _memory_group;   /**< Function memory group */
-    NESobel7x7HorKernel  _sobel_hor;      /**< Sobel Horizontal 7x7 kernel */
-    NESobel7x7VertKernel _sobel_vert;     /**< Sobel Vertical 7x7 kernel */
-    Tensor               _tmp_x;          /**< Temporary buffer for Sobel X */
-    Tensor               _tmp_y;          /**< Temporary buffer for Sobel Y */
-    NEFillBorderKernel   _border_handler; /**< Kernel to handle tensor borders */
+    MemoryGroup                           _memory_group;   /**< Function memory group */
+    std::unique_ptr<NESobel7x7HorKernel>  _sobel_hor;      /**< Sobel Horizontal 7x7 kernel */
+    std::unique_ptr<NESobel7x7VertKernel> _sobel_vert;     /**< Sobel Vertical 7x7 kernel */
+    Tensor                                _tmp_x;          /**< Temporary buffer for Sobel X */
+    Tensor                                _tmp_y;          /**< Temporary buffer for Sobel Y */
+    std::unique_ptr<NEFillBorderKernel>   _border_handler; /**< Kernel to handle tensor borders */
 };
 }
 #endif /*ARM_COMPUTE_NESOBEL7x7_H */
diff --git a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
index 20b20201d5..40fa38afde 100644
--- a/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESoftmaxLayer.h
@@ -24,16 +24,19 @@
 #ifndef ARM_COMPUTE_NESOFTMAXLAYER_H
 #define ARM_COMPUTE_NESOFTMAXLAYER_H
 
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/NEON/functions/NEPermute.h"
 #include "arm_compute/runtime/Tensor.h"
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
+class NELogits1DMaxKernel;
+template <bool IS_LOG>
+class NELogits1DSoftmaxKernel;
+class NEFillBorderKernel;
 
 /** Basic function to compute a SoftmaxLayer and a Log SoftmaxLayer.
  *
@@ -64,6 +67,8 @@ public:
     NESoftmaxLayerGeneric &operator=(const NESoftmaxLayerGeneric &) = delete;
     /** Default move assignment operator */
     NESoftmaxLayerGeneric &operator=(NESoftmaxLayerGeneric &&) = default;
+    /** Default destructor */
+    ~NESoftmaxLayerGeneric();
     /** Set the input and output tensors.
      *
      * @param[in,out] input  Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32. If the width is not a
@@ -91,17 +96,17 @@ public:
     void run() override;
 
 private:
-    MemoryGroup                     _memory_group;
-    NEPermute                       _permute_input;
-    NEPermute                       _permute_output;
-    NELogits1DMaxKernel             _max_kernel;
-    NELogits1DSoftmaxKernel<IS_LOG> _softmax_kernel;
-    NEFillBorderKernel              _fill_border_kernel;
-    Tensor                          _max;
-    Tensor                          _tmp;
-    Tensor                          _input_permuted;
-    Tensor                          _output_permuted;
-    bool                            _needs_permute;
+    MemoryGroup                                      _memory_group;
+    NEPermute                                        _permute_input;
+    NEPermute                                        _permute_output;
+    std::unique_ptr<NELogits1DMaxKernel>             _max_kernel;
+    std::unique_ptr<NELogits1DSoftmaxKernel<IS_LOG>> _softmax_kernel;
+    std::unique_ptr<NEFillBorderKernel>              _fill_border_kernel;
+    Tensor                                           _max;
+    Tensor                                           _tmp;
+    Tensor                                           _input_permuted;
+    Tensor                                           _output_permuted;
+    bool                                             _needs_permute;
 };
 
 using NESoftmaxLayer    = NESoftmaxLayerGeneric<false>;
diff --git a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
index 6f339e8d52..6df06e87ec 100644
--- a/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESpaceToBatchLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,13 +26,15 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
-#include "arm_compute/core/NEON/kernels/NESpaceToBatchLayerKernel.h"
 #include "arm_compute/core/Types.h"
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
+class NESpaceToBatchLayerKernel;
+class NEMemsetKernel;
 
 /** Basic function to spatial divide a tensor. This function calls the following NEON kernels/functions:
  *
@@ -53,7 +55,7 @@ public:
     /** Allow instances of this class to be moved */
     NESpaceToBatchLayer &operator=(NESpaceToBatchLayer &&) = default;
     /** Default destructor */
-    virtual ~NESpaceToBatchLayer() = default;
+    ~NESpaceToBatchLayer();
     /** Set the input and output tensors.
      *
      * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All.
@@ -99,9 +101,9 @@ public:
     void run() override;
 
 private:
-    NESpaceToBatchLayerKernel _space_to_batch_kernel; /**< SpaceToBatch kernel to run */
-    NEMemsetKernel            _memset_kernel;         /**< Memset kernel to run */
-    bool                      _has_padding;           /**< Flag to check if the output has padding */
+    std::unique_ptr<NESpaceToBatchLayerKernel> _space_to_batch_kernel; /**< SpaceToBatch kernel to run */
+    std::unique_ptr<NEMemsetKernel>            _memset_kernel;         /**< Memset kernel to run */
+    bool                                       _has_padding;           /**< Flag to check if the output has padding */
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NESPACETOBATCHLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
index 16a9c80d44..1e7aae215d 100644
--- a/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
+++ b/arm_compute/runtime/NEON/functions/NESpaceToDepthLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,15 +24,16 @@
 #ifndef ARM_COMPUTE_NESPACETODEPTHLAYER_H
 #define ARM_COMPUTE_NESPACETODEPTHLAYER_H
 
+#include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEMemsetKernel.h"
-#include "arm_compute/core/NEON/kernels/NESpaceToDepthLayerKernel.h"
-#include "arm_compute/core/Types.h"
+#include <memory>
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
+class NESpaceToDepthLayerKernel;
 
 /** This function calls the following NEON kernels/functions:
  *
@@ -52,7 +53,7 @@ public:
     /** Allow instances of this class to be moved */
     NESpaceToDepthLayer &operator=(NESpaceToDepthLayer &&) = default;
     /** Default destructor */
-    virtual ~NESpaceToDepthLayer() = default;
+    ~NESpaceToDepthLayer();
     /** Set the input and output tensors.
      *
      * @param[in]  input       Tensor input. Supported tensor rank: 4. Data types supported: All.
@@ -74,7 +75,7 @@ public:
     void run() override;
 
 private:
-    NESpaceToDepthLayerKernel _space_to_depth_kernel; /**< SpaceToDepth kernel to run */
+    std::unique_ptr<NESpaceToDepthLayerKernel> _space_to_depth_kernel; /**< SpaceToDepth kernel to run */
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NESPACETODEPTHLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEStackLayer.h b/arm_compute/runtime/NEON/functions/NEStackLayer.h
index 4180b6da08..f6fa4f2eb3 100644
--- a/arm_compute/runtime/NEON/functions/NEStackLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEStackLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,14 +27,14 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/kernels/NEStackLayerKernel.h"
-
 #include <memory>
 #include <vector>
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
+class NEStackLayerKernel;
 
 /** Basic function to stack tensors along an axis. This function calls the following kernel:
  *
@@ -46,6 +46,16 @@ class NEStackLayer : public IFunction
 public:
     /** Default constructor */
     NEStackLayer();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEStackLayer(const NEStackLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEStackLayer &operator=(const NEStackLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEStackLayer(NEStackLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEStackLayer &operator=(NEStackLayer &&) = delete;
+    /** Default destructor */
+    ~NEStackLayer();
     /** Initialise the kernel's inputs vector and output.
      *
      * @note Supported input tensor rank: up to 4
@@ -73,9 +83,9 @@ public:
     void run() override;
 
 private:
-    std::vector<ITensor *>          _input;
-    std::vector<NEStackLayerKernel> _stack_kernels;
-    unsigned int                    _num_inputs;
+    std::vector<ITensor *>                           _input;
+    std::vector<std::unique_ptr<NEStackLayerKernel>> _stack_kernels;
+    unsigned int                                     _num_inputs;
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NESTACKLAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NETableLookup.h b/arm_compute/runtime/NEON/functions/NETableLookup.h
index fb08274761..03674cd297 100644
--- a/arm_compute/runtime/NEON/functions/NETableLookup.h
+++ b/arm_compute/runtime/NEON/functions/NETableLookup.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEThreshold.h b/arm_compute/runtime/NEON/functions/NEThreshold.h
index cb9b696769..0a9edfc7ca 100644
--- a/arm_compute/runtime/NEON/functions/NEThreshold.h
+++ b/arm_compute/runtime/NEON/functions/NEThreshold.h
@@ -34,6 +34,7 @@ namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEThresholdKernel */
 class NEThreshold : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NETile.h b/arm_compute/runtime/NEON/functions/NETile.h
index 53a94db583..d5ce76c9cf 100644
--- a/arm_compute/runtime/NEON/functions/NETile.h
+++ b/arm_compute/runtime/NEON/functions/NETile.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -31,6 +31,7 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NETileKernel */
 class NETile : public INESimpleFunctionNoBorder
diff --git a/arm_compute/runtime/NEON/functions/NETranspose.h b/arm_compute/runtime/NEON/functions/NETranspose.h
index 1169459f0f..2651bdd727 100644
--- a/arm_compute/runtime/NEON/functions/NETranspose.h
+++ b/arm_compute/runtime/NEON/functions/NETranspose.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,6 +30,7 @@
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to transpose a matrix on NEON. This function calls the following NEON kernel:
  *
diff --git a/arm_compute/runtime/NEON/functions/NEUnstack.h b/arm_compute/runtime/NEON/functions/NEUnstack.h
index 2e3a679664..c8e85115f7 100644
--- a/arm_compute/runtime/NEON/functions/NEUnstack.h
+++ b/arm_compute/runtime/NEON/functions/NEUnstack.h
@@ -45,6 +45,16 @@ class NEUnstack : public IFunction
 public:
     /** Default constructor */
     NEUnstack();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEUnstack(const NEUnstack &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEUnstack &operator=(const NEUnstack &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEUnstack(NEUnstack &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEUnstack &operator=(NEUnstack &&) = delete;
+    /** Default destructor */
+    ~NEUnstack() = default;
     /** Set the input, output and unstacking axis.
      *
      * @param[in]     input         A tensor to be unstacked. Data type supported: All.
diff --git a/arm_compute/runtime/NEON/functions/NEUpsampleLayer.h b/arm_compute/runtime/NEON/functions/NEUpsampleLayer.h
index f9145f1612..168845d203 100644
--- a/arm_compute/runtime/NEON/functions/NEUpsampleLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEUpsampleLayer.h
@@ -24,15 +24,17 @@
 #ifndef ARM_COMPUTE_NEUPSAMPLELAYER_H
 #define ARM_COMPUTE_NEUPSAMPLELAYER_H
 
-#include "arm_compute/core/NEON/kernels/NEUpsampleLayerKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/NEON/NEScheduler.h"
 #include "arm_compute/runtime/Tensor.h"
 
+#include <memory>
+
 namespace arm_compute
 {
 class ITensor;
+class NEUpsampleLayerKernel;
 
 /** Function to run upsample layer */
 class NEUpsampleLayer : public IFunction
@@ -40,6 +42,16 @@ class NEUpsampleLayer : public IFunction
 public:
     /** Constructor */
     NEUpsampleLayer();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEUpsampleLayer(const NEUpsampleLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEUpsampleLayer &operator=(const NEUpsampleLayer &) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEUpsampleLayer(NEUpsampleLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEUpsampleLayer &operator=(NEUpsampleLayer &&) = delete;
+    /** Default destructor */
+    ~NEUpsampleLayer();
     /** Set the input output tensors.
      *
      * @param[in]  input  Source tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/F16/F32.
@@ -66,8 +78,8 @@ public:
     void run() override;
 
 private:
-    NEUpsampleLayerKernel _kernel;
-    DataLayout            _data_layout;
+    std::unique_ptr<NEUpsampleLayerKernel> _kernel;
+    DataLayout                             _data_layout;
 };
 } // arm_compute
 #endif /* ARM_COMPUTE_NEUPSAMPLELAYER_H */
diff --git a/arm_compute/runtime/NEON/functions/NEWarpAffine.h b/arm_compute/runtime/NEON/functions/NEWarpAffine.h
index eb7492b71f..6b9a2f4354 100644
--- a/arm_compute/runtime/NEON/functions/NEWarpAffine.h
+++ b/arm_compute/runtime/NEON/functions/NEWarpAffine.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEWarpPerspective.h b/arm_compute/runtime/NEON/functions/NEWarpPerspective.h
index c439e82db5..caa91db64e 100644
--- a/arm_compute/runtime/NEON/functions/NEWarpPerspective.h
+++ b/arm_compute/runtime/NEON/functions/NEWarpPerspective.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
index 4090c8c409..6b61e7031b 100644
--- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
@@ -26,7 +26,6 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/NEON/INEKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CPP/functions/CPPPermute.h"
 #include "arm_compute/runtime/MemoryGroup.h"
@@ -41,6 +40,7 @@ namespace arm_compute
 {
 // Forward declarations
 class ITensor;
+class ICPPKernel;
 
 /** Basic function to simulate a convolution layer. This function calls the following NEON kernels:
  * -# @ref NEWinogradLayerTransformWeightsKernel (executed only once in the first call to the run() method )
@@ -56,6 +56,12 @@ class NEWinogradConvolutionLayer : public IFunction
 public:
     /** Constructor */
     NEWinogradConvolutionLayer(const std::shared_ptr<IMemoryManager> &memory_manager = nullptr);
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEWinogradConvolutionLayer(NEWinogradConvolutionLayer &&) = delete;
+    /** Prevent instances of this class from being moved (As this class contains non movable objects) */
+    NEWinogradConvolutionLayer &operator=(NEWinogradConvolutionLayer &&) = delete;
+    /** Default destructor */
+    ~NEWinogradConvolutionLayer() = default;
 
     /** Set the input and output tensors.
      *
@@ -105,12 +111,12 @@ public:
     NEWinogradConvolutionLayer &operator=(const NEWinogradConvolutionLayer &) = delete;
 
 private:
-    MemoryGroup                _memory_group;
-    NEGEMM                     _gemm_function;
-    std::unique_ptr<INEKernel> _transform_input_kernel;
-    std::unique_ptr<INEKernel> _transform_output_kernel;
-    std::unique_ptr<INEKernel> _transform_weights_kernel;
-    NEActivationLayer          _activationlayer_function;
+    MemoryGroup                 _memory_group;
+    NEGEMM                      _gemm_function;
+    std::unique_ptr<ICPPKernel> _transform_input_kernel;
+    std::unique_ptr<ICPPKernel> _transform_output_kernel;
+    std::unique_ptr<ICPPKernel> _transform_weights_kernel;
+    NEActivationLayer           _activationlayer_function;
 
     CPPPermute     _permute_input;
     CPPPermute     _permute_weights;
diff --git a/arm_compute/runtime/NEON/functions/NEYOLOLayer.h b/arm_compute/runtime/NEON/functions/NEYOLOLayer.h
index 88219602c1..4c9a5bf6e4 100644
--- a/arm_compute/runtime/NEON/functions/NEYOLOLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEYOLOLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,12 +26,12 @@
 
 #include "arm_compute/runtime/NEON/INESimpleFunctionNoBorder.h"
 
-#include "arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h"
 #include "arm_compute/core/Types.h"
 
 namespace arm_compute
 {
 class ITensor;
+class ITensorInfo;
 
 /** Basic function to run @ref NEYOLOLayerKernel */
 class NEYOLOLayer : public INESimpleFunctionNoBorder
-- 
cgit v1.2.1