From 04a8f8c4994f1c32b3f16a832c0e6f2599364c02 Mon Sep 17 00:00:00 2001
From: Giorgio Arena <giorgio.arena@arm.com>
Date: Thu, 23 Nov 2017 11:45:24 +0000
Subject: COMPMID-692 Consistent names for the interfaces

Change-Id: I4b1f3f0da9ff5342c7de7083736fe91871d14e5b
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110351
Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
---
 arm_compute/core/CL/CLKernels.h                    |   8 +-
 .../core/CL/kernels/CLDepthConcatenateKernel.h     |  77 ---
 .../CL/kernels/CLDepthConcatenateLayerKernel.h     |  77 +++
 arm_compute/core/CL/kernels/CLDepthConvertKernel.h |  63 ---
 .../core/CL/kernels/CLDepthConvertLayerKernel.h    |  63 +++
 .../CL/kernels/CLDepthwiseConvolution3x3Kernel.h   |  75 ---
 .../kernels/CLDepthwiseConvolutionLayer3x3Kernel.h |  75 +++
 arm_compute/core/CL/kernels/CLL2NormalizeKernel.h  |  72 ---
 .../core/CL/kernels/CLL2NormalizeLayerKernel.h     |  72 +++
 arm_compute/core/GLES_COMPUTE/GCKernels.h          |   2 +-
 .../kernels/GCDepthConcatenateKernel.h             |  76 ---
 .../kernels/GCDepthConcatenateLayerKernel.h        |  76 +++
 arm_compute/core/NEON/NEKernels.h                  |   8 +-
 .../core/NEON/kernels/NEDepthConcatenateKernel.h   |  80 ----
 .../NEON/kernels/NEDepthConcatenateLayerKernel.h   |  80 ++++
 .../core/NEON/kernels/NEDepthConvertKernel.h       |  85 ----
 .../core/NEON/kernels/NEDepthConvertLayerKernel.h  |  85 ++++
 .../NEON/kernels/NEDepthwiseConvolution3x3Kernel.h |  69 ---
 .../kernels/NEDepthwiseConvolutionLayer3x3Kernel.h |  69 +++
 .../core/NEON/kernels/NEL2NormalizeKernel.h        |  70 ---
 .../core/NEON/kernels/NEL2NormalizeLayerKernel.h   |  70 +++
 arm_compute/graph/nodes/L2NormalizeLayer.h         |   2 +-
 arm_compute/runtime/CL/CLFunctions.h               |   8 +-
 .../runtime/CL/functions/CLDepthConcatenate.h      |  70 ---
 .../runtime/CL/functions/CLDepthConcatenateLayer.h |  70 +++
 arm_compute/runtime/CL/functions/CLDepthConvert.h  |  64 ---
 .../runtime/CL/functions/CLDepthConvertLayer.h     |  64 +++
 .../runtime/CL/functions/CLDepthwiseConvolution.h  | 110 -----
 .../CL/functions/CLDepthwiseConvolutionLayer.h     | 110 +++++
 .../CLDepthwiseSeparableConvolutionLayer.h         |   8 +-
 arm_compute/runtime/CL/functions/CLL2Normalize.h   |  69 ---
 .../runtime/CL/functions/CLL2NormalizeLayer.h      |  69 +++
 .../runtime/CL/functions/CLLaplacianPyramid.h      |   4 +-
 .../runtime/CL/functions/CLLaplacianReconstruct.h  |   6 +-
 arm_compute/runtime/GLES_COMPUTE/GCFunctions.h     |   2 +-
 .../GLES_COMPUTE/functions/GCDepthConcatenate.h    |  67 ---
 .../functions/GCDepthConcatenateLayer.h            |  67 +++
 arm_compute/runtime/NEON/NEFunctions.h             |   8 +-
 .../runtime/NEON/functions/NEDepthConcatenate.h    |  67 ---
 .../NEON/functions/NEDepthConcatenateLayer.h       |  67 +++
 .../runtime/NEON/functions/NEDepthConvert.h        |  69 ---
 .../runtime/NEON/functions/NEDepthConvertLayer.h   |  69 +++
 .../NEON/functions/NEDepthwiseConvolution.h        | 113 -----
 .../NEON/functions/NEDepthwiseConvolutionLayer.h   | 113 +++++
 .../NEDepthwiseSeparableConvolutionLayer.h         |   8 +-
 arm_compute/runtime/NEON/functions/NEL2Normalize.h |  70 ---
 .../runtime/NEON/functions/NEL2NormalizeLayer.h    |  70 +++
 .../runtime/NEON/functions/NELaplacianPyramid.h    |   4 +-
 .../NEON/functions/NELaplacianReconstruct.h        |   6 +-
 docs/00_introduction.dox                           |  10 +-
 src/core/CL/kernels/CLDepthConcatenateKernel.cpp   | 139 ------
 .../CL/kernels/CLDepthConcatenateLayerKernel.cpp   | 139 ++++++
 src/core/CL/kernels/CLDepthConvertKernel.cpp       | 117 -----
 src/core/CL/kernels/CLDepthConvertLayerKernel.cpp  | 117 +++++
 .../CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp | 219 ---------
 .../CLDepthwiseConvolutionLayer3x3Kernel.cpp       | 219 +++++++++
 src/core/CL/kernels/CLL2NormalizeKernel.cpp        | 110 -----
 src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp   | 110 +++++
 .../kernels/GCDepthConcatenateKernel.cpp           | 143 ------
 .../kernels/GCDepthConcatenateLayerKernel.cpp      | 143 ++++++
 src/core/NEON/kernels/NEDepthConcatenateKernel.cpp | 170 -------
 .../NEON/kernels/NEDepthConcatenateLayerKernel.cpp | 170 +++++++
 src/core/NEON/kernels/NEDepthConvertKernel.cpp     | 524 ---------------------
 .../NEON/kernels/NEDepthConvertLayerKernel.cpp     | 524 +++++++++++++++++++++
 .../kernels/NEDepthwiseConvolution3x3Kernel.cpp    | 186 --------
 .../NEDepthwiseConvolutionLayer3x3Kernel.cpp       | 186 ++++++++
 src/core/NEON/kernels/NEL2NormalizeKernel.cpp      | 126 -----
 src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp | 126 +++++
 src/graph/operations/CLSimpleOperations.cpp        |  10 +-
 src/graph/operations/NESimpleOperations.cpp        |  10 +-
 src/runtime/CL/functions/CLDepthConcatenate.cpp    |  78 ---
 .../CL/functions/CLDepthConcatenateLayer.cpp       |  78 +++
 src/runtime/CL/functions/CLDepthConvert.cpp        |  38 --
 src/runtime/CL/functions/CLDepthConvertLayer.cpp   |  38 ++
 .../CL/functions/CLDepthwiseConvolution.cpp        | 138 ------
 .../CL/functions/CLDepthwiseConvolutionLayer.cpp   | 138 ++++++
 src/runtime/CL/functions/CLL2Normalize.cpp         |  63 ---
 src/runtime/CL/functions/CLL2NormalizeLayer.cpp    |  63 +++
 src/runtime/CL/functions/CLLaplacianPyramid.cpp    |   2 +-
 .../GLES_COMPUTE/functions/GCDepthConcatenate.cpp  |  69 ---
 .../functions/GCDepthConcatenateLayer.cpp          |  69 +++
 src/runtime/NEON/functions/NEDepthConcatenate.cpp  |  74 ---
 .../NEON/functions/NEDepthConcatenateLayer.cpp     |  74 +++
 src/runtime/NEON/functions/NEDepthConvert.cpp      |  38 --
 src/runtime/NEON/functions/NEDepthConvertLayer.cpp |  38 ++
 .../NEON/functions/NEDepthwiseConvolution.cpp      | 126 -----
 .../NEON/functions/NEDepthwiseConvolutionLayer.cpp | 126 +++++
 src/runtime/NEON/functions/NEL2Normalize.cpp       |  57 ---
 src/runtime/NEON/functions/NEL2NormalizeLayer.cpp  |  57 +++
 src/runtime/NEON/functions/NELaplacianPyramid.cpp  |   2 +-
 tests/benchmark/CL/DepthwiseConvolution.cpp        |  51 --
 tests/benchmark/CL/DepthwiseConvolutionLayer.cpp   |  51 ++
 tests/benchmark/CL/SYSTEM/MobileNet.cpp            |   4 +-
 tests/benchmark/CL/SYSTEM/MobileNetV1.cpp          |   6 +-
 .../fixtures/DepthwiseConvolutionFixture.h         |  92 ----
 .../fixtures/DepthwiseConvolutionLayerFixture.h    |  92 ++++
 tests/benchmark/fixtures/MobileNetFixture.h        |   4 +-
 tests/datasets/DepthwiseConvolutionDataset.h       | 187 --------
 tests/datasets/DepthwiseConvolutionLayerDataset.h  | 187 ++++++++
 .../MobileNetDepthwiseConvolutionDataset.h         |  58 ---
 .../MobileNetDepthwiseConvolutionLayerDataset.h    |  58 +++
 tests/datasets/ShapeDatasets.h                     |   6 +-
 tests/networks/MobileNetNetwork.h                  |   8 +-
 tests/validation/CL/DepthConcatenateLayer.cpp      |  10 +-
 tests/validation/CL/DepthConvert.cpp               | 484 -------------------
 tests/validation/CL/DepthConvertLayer.cpp          | 492 +++++++++++++++++++
 tests/validation/CL/DepthwiseConvolution.cpp       | 114 -----
 tests/validation/CL/DepthwiseConvolutionLayer.cpp  | 117 +++++
 tests/validation/CL/L2Normalize.cpp                |  77 ---
 tests/validation/CL/L2NormalizeLayer.cpp           |  77 +++
 tests/validation/CPP/DepthConvert.cpp              | 157 ------
 tests/validation/CPP/DepthConvert.h                |  56 ---
 tests/validation/CPP/DepthConvertLayer.cpp         | 157 ++++++
 tests/validation/CPP/DepthConvertLayer.h           |  56 +++
 tests/validation/CPP/DepthwiseConvolution.cpp      | 195 --------
 tests/validation/CPP/DepthwiseConvolution.h        |  44 --
 tests/validation/CPP/DepthwiseConvolutionLayer.cpp | 195 ++++++++
 tests/validation/CPP/DepthwiseConvolutionLayer.h   |  44 ++
 .../CPP/DepthwiseSeparableConvolutionLayer.cpp     |   2 +-
 tests/validation/CPP/L2Normalize.cpp               |  88 ----
 tests/validation/CPP/L2Normalize.h                 |  44 --
 tests/validation/CPP/L2NormalizeLayer.cpp          |  88 ++++
 tests/validation/CPP/L2NormalizeLayer.h            |  44 ++
 .../GLES_COMPUTE/DepthConcatenateLayer.cpp         |   6 +-
 tests/validation/NEON/DepthConcatenateLayer.cpp    |  12 +-
 tests/validation/NEON/DepthConvert.cpp             | 484 -------------------
 tests/validation/NEON/DepthConvertLayer.cpp        | 492 +++++++++++++++++++
 tests/validation/NEON/DepthwiseConvolution.cpp     | 126 -----
 .../validation/NEON/DepthwiseConvolutionLayer.cpp  | 131 ++++++
 tests/validation/NEON/L2Normalize.cpp              |  75 ---
 tests/validation/NEON/L2NormalizeLayer.cpp         |  75 +++
 .../fixtures/DepthConcatenateLayerFixture.h        |   2 +-
 tests/validation/fixtures/DepthConvertFixture.h    | 131 ------
 .../validation/fixtures/DepthConvertLayerFixture.h | 131 ++++++
 .../fixtures/DepthwiseConvolutionFixture.h         | 179 -------
 .../fixtures/DepthwiseConvolutionLayerFixture.h    | 179 +++++++
 tests/validation/fixtures/L2NormalizeFixture.h     | 107 -----
 .../validation/fixtures/L2NormalizeLayerFixture.h  | 107 +++++
 138 files changed, 6638 insertions(+), 6614 deletions(-)
 delete mode 100644 arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h
 create mode 100644 arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h
 delete mode 100644 arm_compute/core/CL/kernels/CLDepthConvertKernel.h
 create mode 100644 arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h
 delete mode 100644 arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h
 create mode 100644 arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h
 delete mode 100644 arm_compute/core/CL/kernels/CLL2NormalizeKernel.h
 create mode 100644 arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h
 delete mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h
 create mode 100644 arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEDepthConvertKernel.h
 create mode 100644 arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEDepthwiseConvolution3x3Kernel.h
 create mode 100644 arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h
 delete mode 100644 arm_compute/core/NEON/kernels/NEL2NormalizeKernel.h
 create mode 100644 arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h
 delete mode 100644 arm_compute/runtime/CL/functions/CLDepthConcatenate.h
 create mode 100644 arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h
 delete mode 100644 arm_compute/runtime/CL/functions/CLDepthConvert.h
 create mode 100644 arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
 delete mode 100644 arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h
 create mode 100644 arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
 delete mode 100644 arm_compute/runtime/CL/functions/CLL2Normalize.h
 create mode 100644 arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h
 delete mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h
 delete mode 100644 arm_compute/runtime/NEON/functions/NEDepthConcatenate.h
 create mode 100644 arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h
 delete mode 100644 arm_compute/runtime/NEON/functions/NEDepthConvert.h
 create mode 100644 arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
 delete mode 100644 arm_compute/runtime/NEON/functions/NEDepthwiseConvolution.h
 create mode 100644 arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
 delete mode 100644 arm_compute/runtime/NEON/functions/NEL2Normalize.h
 create mode 100644 arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
 delete mode 100644 src/core/CL/kernels/CLDepthConcatenateKernel.cpp
 create mode 100644 src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
 delete mode 100644 src/core/CL/kernels/CLDepthConvertKernel.cpp
 create mode 100644 src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
 delete mode 100644 src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp
 create mode 100644 src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp
 delete mode 100644 src/core/CL/kernels/CLL2NormalizeKernel.cpp
 create mode 100644 src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
 delete mode 100644 src/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.cpp
 create mode 100644 src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp
 delete mode 100644 src/core/NEON/kernels/NEDepthConcatenateKernel.cpp
 create mode 100644 src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
 delete mode 100644 src/core/NEON/kernels/NEDepthConvertKernel.cpp
 create mode 100644 src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp
 delete mode 100644 src/core/NEON/kernels/NEDepthwiseConvolution3x3Kernel.cpp
 create mode 100644 src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
 delete mode 100644 src/core/NEON/kernels/NEL2NormalizeKernel.cpp
 create mode 100644 src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp
 delete mode 100644 src/runtime/CL/functions/CLDepthConcatenate.cpp
 create mode 100644 src/runtime/CL/functions/CLDepthConcatenateLayer.cpp
 delete mode 100644 src/runtime/CL/functions/CLDepthConvert.cpp
 create mode 100644 src/runtime/CL/functions/CLDepthConvertLayer.cpp
 delete mode 100644 src/runtime/CL/functions/CLDepthwiseConvolution.cpp
 create mode 100644 src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
 delete mode 100644 src/runtime/CL/functions/CLL2Normalize.cpp
 create mode 100644 src/runtime/CL/functions/CLL2NormalizeLayer.cpp
 delete mode 100755 src/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.cpp
 create mode 100755 src/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp
 delete mode 100644 src/runtime/NEON/functions/NEDepthConcatenate.cpp
 create mode 100644 src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp
 delete mode 100644 src/runtime/NEON/functions/NEDepthConvert.cpp
 create mode 100644 src/runtime/NEON/functions/NEDepthConvertLayer.cpp
 delete mode 100644 src/runtime/NEON/functions/NEDepthwiseConvolution.cpp
 create mode 100644 src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
 delete mode 100644 src/runtime/NEON/functions/NEL2Normalize.cpp
 create mode 100644 src/runtime/NEON/functions/NEL2NormalizeLayer.cpp
 delete mode 100644 tests/benchmark/CL/DepthwiseConvolution.cpp
 create mode 100644 tests/benchmark/CL/DepthwiseConvolutionLayer.cpp
 delete mode 100644 tests/benchmark/fixtures/DepthwiseConvolutionFixture.h
 create mode 100644 tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h
 delete mode 100644 tests/datasets/DepthwiseConvolutionDataset.h
 create mode 100644 tests/datasets/DepthwiseConvolutionLayerDataset.h
 delete mode 100644 tests/datasets/MobileNetDepthwiseConvolutionDataset.h
 create mode 100644 tests/datasets/MobileNetDepthwiseConvolutionLayerDataset.h
 delete mode 100644 tests/validation/CL/DepthConvert.cpp
 create mode 100644 tests/validation/CL/DepthConvertLayer.cpp
 delete mode 100644 tests/validation/CL/DepthwiseConvolution.cpp
 create mode 100644 tests/validation/CL/DepthwiseConvolutionLayer.cpp
 delete mode 100644 tests/validation/CL/L2Normalize.cpp
 create mode 100644 tests/validation/CL/L2NormalizeLayer.cpp
 delete mode 100644 tests/validation/CPP/DepthConvert.cpp
 delete mode 100644 tests/validation/CPP/DepthConvert.h
 create mode 100644 tests/validation/CPP/DepthConvertLayer.cpp
 create mode 100644 tests/validation/CPP/DepthConvertLayer.h
 delete mode 100644 tests/validation/CPP/DepthwiseConvolution.cpp
 delete mode 100644 tests/validation/CPP/DepthwiseConvolution.h
 create mode 100644 tests/validation/CPP/DepthwiseConvolutionLayer.cpp
 create mode 100644 tests/validation/CPP/DepthwiseConvolutionLayer.h
 delete mode 100644 tests/validation/CPP/L2Normalize.cpp
 delete mode 100644 tests/validation/CPP/L2Normalize.h
 create mode 100644 tests/validation/CPP/L2NormalizeLayer.cpp
 create mode 100644 tests/validation/CPP/L2NormalizeLayer.h
 delete mode 100644 tests/validation/NEON/DepthConvert.cpp
 create mode 100644 tests/validation/NEON/DepthConvertLayer.cpp
 delete mode 100644 tests/validation/NEON/DepthwiseConvolution.cpp
 create mode 100644 tests/validation/NEON/DepthwiseConvolutionLayer.cpp
 delete mode 100644 tests/validation/NEON/L2Normalize.cpp
 create mode 100644 tests/validation/NEON/L2NormalizeLayer.cpp
 delete mode 100644 tests/validation/fixtures/DepthConvertFixture.h
 create mode 100644 tests/validation/fixtures/DepthConvertLayerFixture.h
 delete mode 100644 tests/validation/fixtures/DepthwiseConvolutionFixture.h
 create mode 100644 tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
 delete mode 100644 tests/validation/fixtures/L2NormalizeFixture.h
 create mode 100644 tests/validation/fixtures/L2NormalizeLayerFixture.h

diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h
index 1ffbad90cf..9da0e5ab3a 100644
--- a/arm_compute/core/CL/CLKernels.h
+++ b/arm_compute/core/CL/CLKernels.h
@@ -42,9 +42,9 @@
 #include "arm_compute/core/CL/kernels/CLCol2ImKernel.h"
 #include "arm_compute/core/CL/kernels/CLColorConvertKernel.h"
 #include "arm_compute/core/CL/kernels/CLConvolutionKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthConvertKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h"
+#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
+#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h"
 #include "arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h"
 #include "arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h"
 #include "arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h"
@@ -76,7 +76,7 @@
 #include "arm_compute/core/CL/kernels/CLHistogramKernel.h"
 #include "arm_compute/core/CL/kernels/CLIm2ColKernel.h"
 #include "arm_compute/core/CL/kernels/CLIntegralImageKernel.h"
-#include "arm_compute/core/CL/kernels/CLL2NormalizeKernel.h"
+#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h"
 #include "arm_compute/core/CL/kernels/CLLKTrackerKernel.h"
 #include "arm_compute/core/CL/kernels/CLLocallyConnectedMatrixMultiplyKernel.h"
 #include "arm_compute/core/CL/kernels/CLMagnitudePhaseKernel.h"
diff --git a/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h b/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h
deleted file mode 100644
index 2833d8ec23..0000000000
--- a/arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__
-#define __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the depth concatenate kernel.
- *  The input tensor will be concatenated into the output tensor.
- */
-class CLDepthConcatenateKernel : public ICLKernel
-{
-public:
-    /** Default constructor */
-    CLDepthConcatenateKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    CLDepthConcatenateKernel(const CLDepthConcatenateKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    CLDepthConcatenateKernel &operator=(const CLDepthConcatenateKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    CLDepthConcatenateKernel(CLDepthConcatenateKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    CLDepthConcatenateKernel &operator=(CLDepthConcatenateKernel &&) = default;
-    /** Default destructor */
-    ~CLDepthConcatenateKernel() = default;
-    /** Initialise the kernel's inputs and output
-     *
-     * @param[in]     input        Input tensor. Data types supported: QS8/QS16/F16/F32.
-     * @param[in]     depth_offset The offset on the Z axis.
-     * @param[in,out] output       Output tensor. Data types supported: Same as @p input.
-     *
-     * @note: The output tensor's low two dimensions can't be smaller than the input one's.
-     * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
-     *
-     */
-    void configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, cl::CommandQueue &queue) override;
-    BorderSize border_size() const override;
-
-private:
-    const ICLTensor *_input;
-    ICLTensor       *_output;
-    int              _top_bottom;
-    int              _left_right;
-    unsigned int     _depth_offset;
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h
new file mode 100644
index 0000000000..467bdfab3b
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__
+#define __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the depth concatenate kernel.
+ *  The input tensor will be concatenated into the output tensor.
+ */
+class CLDepthConcatenateLayerKernel : public ICLKernel
+{
+public:
+    /** Default constructor */
+    CLDepthConcatenateLayerKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLDepthConcatenateLayerKernel(const CLDepthConcatenateLayerKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLDepthConcatenateLayerKernel &operator=(const CLDepthConcatenateLayerKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    CLDepthConcatenateLayerKernel(CLDepthConcatenateLayerKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    CLDepthConcatenateLayerKernel &operator=(CLDepthConcatenateLayerKernel &&) = default;
+    /** Default destructor */
+    ~CLDepthConcatenateLayerKernel() = default;
+    /** Initialise the kernel's inputs and output
+     *
+     * @param[in]     input        Input tensor. Data types supported: QS8/QS16/F16/F32.
+     * @param[in]     depth_offset The offset on the Z axis.
+     * @param[in,out] output       Output tensor. Data types supported: Same as @p input.
+     *
+     * @note: The output tensor's low two dimensions can't be smaller than the input one's.
+     * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
+     *
+     */
+    void configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output);
+
+    // Inherited methods overridden:
+    void run(const Window &window, cl::CommandQueue &queue) override;
+    BorderSize border_size() const override;
+
+private:
+    const ICLTensor *_input;
+    ICLTensor       *_output;
+    int              _top_bottom;
+    int              _left_right;
+    unsigned int     _depth_offset;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_CLDEPTHCONCATENATEKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLDepthConvertKernel.h b/arm_compute/core/CL/kernels/CLDepthConvertKernel.h
deleted file mode 100644
index da70bff0fd..0000000000
--- a/arm_compute/core/CL/kernels/CLDepthConvertKernel.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__
-#define __ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__
-
-#include "arm_compute/core/CL/ICLSimple2DKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the depth conversion kernel.
- *
- */
-class CLDepthConvertKernel : public ICLSimple2DKernel
-{
-public:
-    /** Set the input and output of the kernel.
-     *
-     * Valid conversions Input -> Output :
-     *
-     *   - QS8 ->  F32
-     *   - QS16 -> F32
-     *   - U8 -> U16, S16, U32, S32
-     *   - U16 -> U8, U32, S32
-     *   - S16 -> U8, U32, S32
-     *   - U32 -> U8, U16, S16
-     *   - S32 -> U8, U16, S16
-     *   - F32 -> QS8, QS16
-     *
-     * @param[in]  input  The input tensor to convert. Data types supported: U8/QS8/U16/S16/QS16/U32/S32/F32.
-     * @param[out] output The output tensor. Data types supported: U8/QS8/U16/S16/QS16/U32/S32/F32.
-     * @param[in]  policy Conversion policy
-     * @param[in]  shift  Value for down/up conversions. Must be 0 <= shift < 8.
-     */
-    void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h b/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h
new file mode 100644
index 0000000000..3a6310d69e
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__
+#define __ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__
+
+#include "arm_compute/core/CL/ICLSimple2DKernel.h"
+#include "arm_compute/core/Types.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the depth conversion kernel.
+ *
+ */
+class CLDepthConvertLayerKernel : public ICLSimple2DKernel
+{
+public:
+    /** Set the input and output of the kernel.
+     *
+     * Valid conversions Input -> Output :
+     *
+     *   - QS8 ->  F32
+     *   - QS16 -> F32
+     *   - U8 -> U16, S16, U32, S32
+     *   - U16 -> U8, U32, S32
+     *   - S16 -> U8, U32, S32
+     *   - U32 -> U8, U16, S16
+     *   - S32 -> U8, U16, S16
+     *   - F32 -> QS8, QS16
+     *
+     * @param[in]  input  The input tensor to convert. Data types supported: U8/QS8/U16/S16/QS16/U32/S32/F32.
+     * @param[out] output The output tensor. Data types supported: U8/QS8/U16/S16/QS16/U32/S32/F32.
+     * @param[in]  policy Conversion policy
+     * @param[in]  shift  Value for down/up conversions. Must be 0 <= shift < 8.
+     */
+    void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLDEPTHCONVERTKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h
deleted file mode 100644
index f9689a4329..0000000000
--- a/arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONKERNEL3x3_H__
-#define __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONKERNEL3x3_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor.
- */
-class CLDepthwiseConvolution3x3Kernel : public ICLKernel
-{
-public:
-    /** Default constructor */
-    CLDepthwiseConvolution3x3Kernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    CLDepthwiseConvolution3x3Kernel(const CLDepthwiseConvolution3x3Kernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    CLDepthwiseConvolution3x3Kernel &operator=(const CLDepthwiseConvolution3x3Kernel &) = delete;
-    /** Default Move Constructor. */
-    CLDepthwiseConvolution3x3Kernel(CLDepthwiseConvolution3x3Kernel &&) = default;
-    /** Default move assignment operator. */
-    CLDepthwiseConvolution3x3Kernel &operator=(CLDepthwiseConvolution3x3Kernel &&) = default;
-    /** Initialize the function's source, destination, conv and border_size.
-     *
-     * @param[in]  input     Source tensor. DataType supported: QASYMM8/F32.
-     * @param[in]  weights   Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input.
-     * @param[in]  biases    (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
-     *                       Data type supported: Same as @p input.
-     * @param[out] output    Destination tensor. Data type supported: Same as @p input.
-     * @param[in]  conv_info Padding and stride information to use for the convolution.
-     */
-    void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
-
-    // Inherited methods overridden:
-    void run(const Window &window, cl::CommandQueue &queue) override;
-    BorderSize border_size() const override;
-
-private:
-    BorderSize       _border_size;
-    const ICLTensor *_input;
-    ICLTensor       *_output;
-    const ICLTensor *_weights;
-    const ICLTensor *_biases;
-    unsigned int     _conv_stride_x;
-    unsigned int     _conv_stride_y;
-    unsigned int     _conv_pad_left;
-    unsigned int     _conv_pad_top;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTIONKERNEL3x3_H__ */
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h
new file mode 100644
index 0000000000..eb62465f84
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONKERNEL3x3_H__
+#define __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONKERNEL3x3_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor.
+ */
+class CLDepthwiseConvolutionLayer3x3Kernel : public ICLKernel
+{
+public:
+    /** Default constructor */
+    CLDepthwiseConvolutionLayer3x3Kernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLDepthwiseConvolutionLayer3x3Kernel(const CLDepthwiseConvolutionLayer3x3Kernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLDepthwiseConvolutionLayer3x3Kernel &operator=(const CLDepthwiseConvolutionLayer3x3Kernel &) = delete;
+    /** Default Move Constructor. */
+    CLDepthwiseConvolutionLayer3x3Kernel(CLDepthwiseConvolutionLayer3x3Kernel &&) = default;
+    /** Default move assignment operator. */
+    CLDepthwiseConvolutionLayer3x3Kernel &operator=(CLDepthwiseConvolutionLayer3x3Kernel &&) = default;
+    /** Initialize the function's source, destination, conv and border_size.
+     *
+     * @param[in]  input     Source tensor. DataType supported: QASYMM8/F32.
+     * @param[in]  weights   Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input.
+     * @param[in]  biases    (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed.
+     *                       Data type supported: Same as @p input.
+     * @param[out] output    Destination tensor. Data type supported: Same as @p input.
+     * @param[in]  conv_info Padding and stride information to use for the convolution.
+     */
+    void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
+
+    // Inherited methods overridden:
+    void run(const Window &window, cl::CommandQueue &queue) override;
+    BorderSize border_size() const override;
+
+private:
+    BorderSize       _border_size;
+    const ICLTensor *_input;
+    ICLTensor       *_output;
+    const ICLTensor *_weights;
+    const ICLTensor *_biases;
+    unsigned int     _conv_stride_x;
+    unsigned int     _conv_stride_y;
+    unsigned int     _conv_pad_left;
+    unsigned int     _conv_pad_top;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTIONKERNEL3x3_H__ */
diff --git a/arm_compute/core/CL/kernels/CLL2NormalizeKernel.h b/arm_compute/core/CL/kernels/CLL2NormalizeKernel.h
deleted file mode 100644
index 2056b4e615..0000000000
--- a/arm_compute/core/CL/kernels/CLL2NormalizeKernel.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_CLL2NORMALIZEKERNEL_H__
-#define __ARM_COMPUTE_CLL2NORMALIZEKERNEL_H__
-
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the reduction operation kernel */
-class CLL2NormalizeKernel : public ICLKernel
-{
-public:
-    /** Default constructor */
-    CLL2NormalizeKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    CLL2NormalizeKernel(const CLL2NormalizeKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    CLL2NormalizeKernel &operator=(const CLL2NormalizeKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    CLL2NormalizeKernel(CLL2NormalizeKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    CLL2NormalizeKernel &operator=(CLL2NormalizeKernel &&) = default;
-    /** Default destructor */
-    ~CLL2NormalizeKernel() = default;
-
-    /** Set the input and output tensors.
-     *
-     * @param[in]  input   Source tensor. Data types supported: QS8, QS16, F32.
-     * @param[in]  sum     Sum values tensor. Data types supported: same as @p input.
-     * @param[out] output  Destination tensor. Data types supported: Same as @p input.
-     * @param[in]  axis    Axis along which to reduce. Supported reduction axis : 0
-     * @param[in]  epsilon Lower bound value for the normalization.
-     */
-    void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, unsigned int axis, float epsilon);
-
-    // Inherited methods overridden:
-    void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
-    const ICLTensor *_input;
-    const ICLTensor *_sum;
-    ICLTensor       *_output;
-    unsigned int     _axis;
-    float            _epsilon;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_CLL2NORMALIZEKERNEL_H__ */
diff --git a/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h b/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h
new file mode 100644
index 0000000000..f7d717119b
--- /dev/null
+++ b/arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLL2NORMALIZEKERNEL_H__
+#define __ARM_COMPUTE_CLL2NORMALIZEKERNEL_H__
+
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Interface for the reduction operation kernel */
+class CLL2NormalizeLayerKernel : public ICLKernel
+{
+public:
+    /** Default constructor */
+    CLL2NormalizeLayerKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLL2NormalizeLayerKernel(const CLL2NormalizeLayerKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    CLL2NormalizeLayerKernel &operator=(const CLL2NormalizeLayerKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    CLL2NormalizeLayerKernel(CLL2NormalizeLayerKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    CLL2NormalizeLayerKernel &operator=(CLL2NormalizeLayerKernel &&) = default;
+    /** Default destructor */
+    ~CLL2NormalizeLayerKernel() = default;
+
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input   Source tensor. Data types supported: QS8, QS16, F32.
+     * @param[in]  sum     Sum values tensor. Data types supported: same as @p input.
+     * @param[out] output  Destination tensor. Data types supported: Same as @p input.
+     * @param[in]  axis    Axis along which to reduce. Supported reduction axis : 0
+     * @param[in]  epsilon Lower bound value for the normalization.
+     */
+    void configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, unsigned int axis, float epsilon);
+
+    // Inherited methods overridden:
+    void run(const Window &window, cl::CommandQueue &queue) override;
+
+private:
+    const ICLTensor *_input;
+    const ICLTensor *_sum;
+    ICLTensor       *_output;
+    unsigned int     _axis;
+    float            _epsilon;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_CLL2NORMALIZEKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/GCKernels.h b/arm_compute/core/GLES_COMPUTE/GCKernels.h
index 57d11d5f18..9831e25299 100644
--- a/arm_compute/core/GLES_COMPUTE/GCKernels.h
+++ b/arm_compute/core/GLES_COMPUTE/GCKernels.h
@@ -29,7 +29,7 @@
 #include "arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h"
 #include "arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h"
 #include "arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h"
 #include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h"
 #include "arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h"
 #include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h"
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h
deleted file mode 100644
index 9a34a9a9c5..0000000000
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__
-#define __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__
-
-#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** Interface for the depth concatenate kernel.
- *  The input tensor will be concatenated into the output tensor.
- */
-class GCDepthConcatenateKernel : public IGCKernel
-{
-public:
-    /** Default constructor */
-    GCDepthConcatenateKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    GCDepthConcatenateKernel(const GCDepthConcatenateKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    GCDepthConcatenateKernel &operator=(const GCDepthConcatenateKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    GCDepthConcatenateKernel(GCDepthConcatenateKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    GCDepthConcatenateKernel &operator=(GCDepthConcatenateKernel &&) = default;
-    /** Default destructor */
-    ~GCDepthConcatenateKernel() = default;
-    /** Initialise the kernel's inputs and output
-     *
-     * @param[in]     input        Input tensor. Data types supported: F16/F32.
-     * @param[in]     depth_offset The offset on the Z axis.
-     * @param[in,out] output       Output tensor. Data types supported: Same as @p input.
-     *
-     * @note: The output tensor's low two dimensions can't be smaller than the input one's.
-     * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
-     *
-     */
-    void configure(const IGCTensor *input, unsigned int depth_offset, IGCTensor *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window) override;
-    BorderSize border_size() const override;
-
-private:
-    const IGCTensor *_input;
-    IGCTensor       *_output;
-    int              _top_bottom;
-    int              _left_right;
-};
-}
-#endif /* __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h
new file mode 100644
index 0000000000..ce220cc564
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__
+#define __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the depth concatenate kernel.
+ *  The input tensor will be concatenated into the output tensor.
+ */
+class GCDepthConcatenateLayerKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCDepthConcatenateLayerKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCDepthConcatenateLayerKernel(const GCDepthConcatenateLayerKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCDepthConcatenateLayerKernel &operator=(const GCDepthConcatenateLayerKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCDepthConcatenateLayerKernel(GCDepthConcatenateLayerKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCDepthConcatenateLayerKernel &operator=(GCDepthConcatenateLayerKernel &&) = default;
+    /** Default destructor */
+    ~GCDepthConcatenateLayerKernel() = default;
+    /** Initialise the kernel's inputs and output
+     *
+     * @param[in]     input        Input tensor. Data types supported: F16/F32.
+     * @param[in]     depth_offset The offset on the Z axis.
+     * @param[in,out] output       Output tensor. Data types supported: Same as @p input.
+     *
+     * @note: The output tensor's low two dimensions can't be smaller than the input one's.
+     * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
+     *
+     */
+    void configure(const IGCTensor *input, unsigned int depth_offset, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+    BorderSize border_size() const override;
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+    int              _top_bottom;
+    int              _left_right;
+};
+}
+#endif /* __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__ */
diff --git a/arm_compute/core/NEON/NEKernels.h b/arm_compute/core/NEON/NEKernels.h
index b23e2ac5a3..6c31fa4fb1 100644
--- a/arm_compute/core/NEON/NEKernels.h
+++ b/arm_compute/core/NEON/NEKernels.h
@@ -44,9 +44,9 @@
 #include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h"
 #include "arm_compute/core/NEON/kernels/NECumulativeDistributionKernel.h"
 #include "arm_compute/core/NEON/kernels/NEDeconvolutionLayerUpsampleKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthConvertKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolution3x3Kernel.h"
+#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h"
+#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h"
+#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h"
 #include "arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h"
 #include "arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h"
 #include "arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h"
@@ -83,7 +83,7 @@
 #include "arm_compute/core/NEON/kernels/NEHistogramKernel.h"
 #include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
 #include "arm_compute/core/NEON/kernels/NEIntegralImageKernel.h"
-#include "arm_compute/core/NEON/kernels/NEL2NormalizeKernel.h"
+#include "arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h"
 #include "arm_compute/core/NEON/kernels/NELKTrackerKernel.h"
 #include "arm_compute/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.h"
 #include "arm_compute/core/NEON/kernels/NEMagnitudePhaseKernel.h"
diff --git a/arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h b/arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h
deleted file mode 100644
index 784dfc3f5c..0000000000
--- a/arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef __ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H__
-#define __ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the depth concatenate kernel.
- *  The input tensor will be concatenated into the output tensor.
- */
-class NEDepthConcatenateKernel : public INEKernel
-{
-public:
-    /** Default constructor */
-    NEDepthConcatenateKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDepthConcatenateKernel(const NEDepthConcatenateKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDepthConcatenateKernel &operator=(const NEDepthConcatenateKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEDepthConcatenateKernel(NEDepthConcatenateKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEDepthConcatenateKernel &operator=(NEDepthConcatenateKernel &&) = default;
-    /** Default destructor */
-    ~NEDepthConcatenateKernel() = default;
-    /** Initialise the kernel's inputs and output
-     *
-     * @param[in]     input        Input tensor. Data types supported: QS8/QS16/F16/F32.
-     * @param[in]     depth_offset The offset on the Z axis.
-     * @param[in,out] output       Output tensor. Data types supported: Same as @p input.
-     *
-     * @note: The output tensor's low two dimensions can't be smaller than the input one's.
-     * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
-     *
-     */
-    void configure(const ITensor *input, unsigned int depth_offset, ITensor *output);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    using DepthConcatFunction = void(const ITensor *in, ITensor *out, std::pair<int, int> start_xy, int depth_offset, const Window &window);
-
-private:
-    DepthConcatFunction *_func;
-    const ITensor       *_input;
-    ITensor             *_output;
-    int                  _top_bottom;
-    int                  _left_right;
-    unsigned int         _depth_offset;
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h
new file mode 100644
index 0000000000..6029873f22
--- /dev/null
+++ b/arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H__
+#define __ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the depth concatenate kernel.
+ *  The input tensor will be concatenated into the output tensor.
+ */
+class NEDepthConcatenateLayerKernel : public INEKernel
+{
+public:
+    /** Default constructor */
+    NEDepthConcatenateLayerKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEDepthConcatenateLayerKernel(const NEDepthConcatenateLayerKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEDepthConcatenateLayerKernel &operator=(const NEDepthConcatenateLayerKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    NEDepthConcatenateLayerKernel(NEDepthConcatenateLayerKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    NEDepthConcatenateLayerKernel &operator=(NEDepthConcatenateLayerKernel &&) = default;
+    /** Default destructor */
+    ~NEDepthConcatenateLayerKernel() = default;
+    /** Initialise the kernel's inputs and output
+     *
+     * @param[in]     input        Input tensor. Data types supported: QS8/QS16/F16/F32.
+     * @param[in]     depth_offset The offset on the Z axis.
+     * @param[in,out] output       Output tensor. Data types supported: Same as @p input.
+     *
+     * @note: The output tensor's low two dimensions can't be smaller than the input one's.
+     * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
+     *
+     */
+    void configure(const ITensor *input, unsigned int depth_offset, ITensor *output);
+
+    // Inherited methods overridden:
+    void run(const Window &window, const ThreadInfo &info) override;
+    BorderSize border_size() const override;
+
+private:
+    using DepthConcatFunction = void(const ITensor *in, ITensor *out, std::pair<int, int> start_xy, int depth_offset, const Window &window);
+
+private:
+    DepthConcatFunction *_func;
+    const ITensor       *_input;
+    ITensor             *_output;
+    int                  _top_bottom;
+    int                  _left_right;
+    unsigned int         _depth_offset;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEDEPTHCONCATENATEKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEDepthConvertKernel.h b/arm_compute/core/NEON/kernels/NEDepthConvertKernel.h
deleted file mode 100644
index 332406f239..0000000000
--- a/arm_compute/core/NEON/kernels/NEDepthConvertKernel.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_DEPTHCONVERTKERNEL_H__
-#define __ARM_COMPUTE_DEPTHCONVERTKERNEL_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Depth conversion kernel */
-class NEDepthConvertKernel : public INEKernel
-{
-public:
-    /** Default constructor*/
-    NEDepthConvertKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDepthConvertKernel(const NEDepthConvertKernel &) = delete;
-    /** Default move constructor */
-    NEDepthConvertKernel(NEDepthConvertKernel &&) = default;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDepthConvertKernel &operator=(const NEDepthConvertKernel &) = delete;
-    /** Default move assignment operator */
-    NEDepthConvertKernel &operator=(NEDepthConvertKernel &&) = default;
-    /** Set the input and output of the kernel
-     *
-     * Valid conversions Input -> Output :
-     *
-     *   - QS8 -> QS8, F32
-     *   - U8 -> U16, S16, S32
-     *   - U16 -> U8, U32
-     *   - S16 -> U8, S32
-     *   - QS16 -> QS16, F32
-     *   - F32 -> QS8
-     *
-     * @warning In case of in-place fixed point position conversion make sure that configure has been called
-     *          before the updated tensor is used in other functions, as the TensorInfo of the tensor will be
-     *          altered. In-place is only supported for QS8 -> QS8, QS16 -> QS16.
-     *
-     * @param[in, out] input  The input tensor to convert (Written in case of in-place computation). Data types supported: U8/QS8/U16/S16/F32.
-     * @param[out]     output The output tensor. Can be null in case of in-place computation. Data types supported: U8/QS8/U16/S16/U32/S32/F32.
-     * @param[in]      policy Conversion policy.
-     * @param[in]      shift  (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
-     *                         In case of fixed point position conversion, it specifies the new fixed point position, if operation is in-place.
-     */
-    void configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    ITensor      *_input;
-    ITensor      *_output;
-    ConvertPolicy _policy;
-    uint32_t      _shift;
-    int           _fixed_point_position_input;
-    int           _fixed_point_position_output;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_NEDEPTHCONVERTKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h
new file mode 100644
index 0000000000..af51ded87a
--- /dev/null
+++ b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_DEPTHCONVERTKERNEL_H__
+#define __ARM_COMPUTE_DEPTHCONVERTKERNEL_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+#include "arm_compute/core/Types.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Depth conversion kernel */
+class NEDepthConvertLayerKernel : public INEKernel
+{
+public:
+    /** Default constructor*/
+    NEDepthConvertLayerKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEDepthConvertLayerKernel(const NEDepthConvertLayerKernel &) = delete;
+    /** Default move constructor */
+    NEDepthConvertLayerKernel(NEDepthConvertLayerKernel &&) = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEDepthConvertLayerKernel &operator=(const NEDepthConvertLayerKernel &) = delete;
+    /** Default move assignment operator */
+    NEDepthConvertLayerKernel &operator=(NEDepthConvertLayerKernel &&) = default;
+    /** Set the input and output of the kernel
+     *
+     * Valid conversions Input -> Output :
+     *
+     *   - QS8 -> QS8, F32
+     *   - U8 -> U16, S16, S32
+     *   - U16 -> U8, U32
+     *   - S16 -> U8, S32
+     *   - QS16 -> QS16, F32
+     *   - F32 -> QS8
+     *
+     * @warning In case of in-place fixed point position conversion make sure that configure has been called
+     *          before the updated tensor is used in other functions, as the TensorInfo of the tensor will be
+     *          altered. In-place is only supported for QS8 -> QS8, QS16 -> QS16.
+     *
+     * @param[in, out] input  The input tensor to convert (Written in case of in-place computation). Data types supported: U8/QS8/U16/S16/F32.
+     * @param[out]     output The output tensor. Can be null in case of in-place computation. Data types supported: U8/QS8/U16/S16/U32/S32/F32.
+     * @param[in]      policy Conversion policy.
+     * @param[in]      shift  (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
+     *                         In case of fixed point position conversion, it specifies the new fixed point position, if operation is in-place.
+     */
+    void configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0);
+
+    // Inherited methods overridden:
+    void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+    ITensor      *_input;
+    ITensor      *_output;
+    ConvertPolicy _policy;
+    uint32_t      _shift;
+    int           _fixed_point_position_input;
+    int           _fixed_point_position_output;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_NEDEPTHCONVERTKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolution3x3Kernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolution3x3Kernel.h
deleted file mode 100644
index a32a06b61d..0000000000
--- a/arm_compute/core/NEON/kernels/NEDepthwiseConvolution3x3Kernel.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_NEDEPTHWISECONVOLUTIONKERNEL3x3_H__
-#define __ARM_COMPUTE_NEDEPTHWISECONVOLUTIONKERNEL3x3_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor.
- */
-class NEDepthwiseConvolution3x3Kernel : public INEKernel
-{
-public:
-    /** Default constructor */
-    NEDepthwiseConvolution3x3Kernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDepthwiseConvolution3x3Kernel(const NEDepthwiseConvolution3x3Kernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEDepthwiseConvolution3x3Kernel &operator=(const NEDepthwiseConvolution3x3Kernel &) = delete;
-    /** Default Move Constructor. */
-    NEDepthwiseConvolution3x3Kernel(NEDepthwiseConvolution3x3Kernel &&) = default;
-    /** Default move assignment operator. */
-    NEDepthwiseConvolution3x3Kernel &operator=(NEDepthwiseConvolution3x3Kernel &&) = default;
-    /** Initialize the function's source, destination, conv and border_size.
-     *
-     * @param[in]  input     Source tensor. DataType supported: F32.
-     * @param[in]  weights   Weights tensor. This is a 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input.
-     * @param[out] output    Destination tensor. Data type supported: Same as @p input.
-     * @param[in]  conv_info Padding and stride information to use for the convolution.
-     */
-    void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-    BorderSize border_size() const override;
-
-private:
-    BorderSize     _border_size;
-    const ITensor *_input;
-    ITensor       *_output;
-    const ITensor *_weights;
-    PadStrideInfo  _conv_info;
-};
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTIONKERNEL3x3_H__ */
\ No newline at end of file
diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h
new file mode 100644
index 0000000000..b8f01cb635
--- /dev/null
+++ b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEDEPTHWISECONVOLUTIONKERNEL3x3_H__
+#define __ARM_COMPUTE_NEDEPTHWISECONVOLUTIONKERNEL3x3_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor.
+ */
+class NEDepthwiseConvolutionLayer3x3Kernel : public INEKernel
+{
+public:
+    /** Default constructor */
+    NEDepthwiseConvolutionLayer3x3Kernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEDepthwiseConvolutionLayer3x3Kernel(const NEDepthwiseConvolutionLayer3x3Kernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEDepthwiseConvolutionLayer3x3Kernel &operator=(const NEDepthwiseConvolutionLayer3x3Kernel &) = delete;
+    /** Default Move Constructor. */
+    NEDepthwiseConvolutionLayer3x3Kernel(NEDepthwiseConvolutionLayer3x3Kernel &&) = default;
+    /** Default move assignment operator. */
+    NEDepthwiseConvolutionLayer3x3Kernel &operator=(NEDepthwiseConvolutionLayer3x3Kernel &&) = default;
+    /** Initialize the function's source, destination, conv and border_size.
+     *
+     * @param[in]  input     Source tensor. DataType supported: F32.
+     * @param[in]  weights   Weights tensor. This is a 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input.
+     * @param[out] output    Destination tensor. Data type supported: Same as @p input.
+     * @param[in]  conv_info Padding and stride information to use for the convolution.
+     */
+    void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info);
+
+    // Inherited methods overridden:
+    void run(const Window &window, const ThreadInfo &info) override;
+    BorderSize border_size() const override;
+
+private:
+    BorderSize     _border_size;
+    const ITensor *_input;
+    ITensor       *_output;
+    const ITensor *_weights;
+    PadStrideInfo  _conv_info;
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTIONKERNEL3x3_H__ */
\ No newline at end of file
diff --git a/arm_compute/core/NEON/kernels/NEL2NormalizeKernel.h b/arm_compute/core/NEON/kernels/NEL2NormalizeKernel.h
deleted file mode 100644
index fbbe4bee99..0000000000
--- a/arm_compute/core/NEON/kernels/NEL2NormalizeKernel.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_NEL2NORMALIZEKERNEL_H__
-#define __ARM_COMPUTE_NEL2NORMALIZEKERNEL_H__
-
-#include "arm_compute/core/NEON/INEKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for performing a L2 normalize on a given axis given the square sum of it in this axis */
-class NEL2NormalizeKernel : public INEKernel
-{
-public:
-    /** Default constructor */
-    NEL2NormalizeKernel();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEL2NormalizeKernel(const NEL2NormalizeKernel &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEL2NormalizeKernel &operator=(const NEL2NormalizeKernel &) = delete;
-    /** Allow instances of this class to be moved */
-    NEL2NormalizeKernel(NEL2NormalizeKernel &&) = default;
-    /** Allow instances of this class to be moved */
-    NEL2NormalizeKernel &operator=(NEL2NormalizeKernel &&) = default;
-    /** Default destructor */
-    ~NEL2NormalizeKernel() = default;
-    /** Set the input and output tensors.
-     *
-     * @param[in]  input   Source tensor. Data types supported: F32.
-     * @param[in]  sum     Sum values tensor. Data types supported: same as @p input.
-     * @param[out] output  Destination tensor. Data types supported: same as @p input.
-     * @param[in]  axis    Dimension along which to reduce. Supported reduction axis : 0
-     * @param[in]  epsilon Lower bound value for the normalization.
-     */
-    void configure(const ITensor *input, const ITensor *sum, ITensor *output, unsigned int axis, float epsilon);
-
-    // Inherited methods overridden:
-    void run(const Window &window, const ThreadInfo &info) override;
-
-private:
-    const ITensor *_input;
-    const ITensor *_sum;
-    ITensor       *_output;
-    unsigned int   _axis;
-    float          _epsilon;
-};
-} // namespace arm_compute
-#endif /*__ARM_COMPUTE_NEL2NORMALIZEKERNEL_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h b/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h
new file mode 100644
index 0000000000..7aa5116b68
--- /dev/null
+++ b/arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEL2NORMALIZEKERNEL_H__
+#define __ARM_COMPUTE_NEL2NORMALIZEKERNEL_H__
+
+#include "arm_compute/core/NEON/INEKernel.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Interface for performing a L2 normalize on a given axis given the square sum of it in this axis */
+class NEL2NormalizeLayerKernel : public INEKernel
+{
+public:
+    /** Default constructor */
+    NEL2NormalizeLayerKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEL2NormalizeLayerKernel(const NEL2NormalizeLayerKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEL2NormalizeLayerKernel &operator=(const NEL2NormalizeLayerKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    NEL2NormalizeLayerKernel(NEL2NormalizeLayerKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    NEL2NormalizeLayerKernel &operator=(NEL2NormalizeLayerKernel &&) = default;
+    /** Default destructor */
+    ~NEL2NormalizeLayerKernel() = default;
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input   Source tensor. Data types supported: F32.
+     * @param[in]  sum     Sum values tensor. Data types supported: same as @p input.
+     * @param[out] output  Destination tensor. Data types supported: same as @p input.
+     * @param[in]  axis    Dimension along which to reduce. Supported reduction axis : 0
+     * @param[in]  epsilon Lower bound value for the normalization.
+     */
+    void configure(const ITensor *input, const ITensor *sum, ITensor *output, unsigned int axis, float epsilon);
+
+    // Inherited methods overridden:
+    void run(const Window &window, const ThreadInfo &info) override;
+
+private:
+    const ITensor *_input;
+    const ITensor *_sum;
+    ITensor       *_output;
+    unsigned int   _axis;
+    float          _epsilon;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_NEL2NORMALIZEKERNEL_H__ */
diff --git a/arm_compute/graph/nodes/L2NormalizeLayer.h b/arm_compute/graph/nodes/L2NormalizeLayer.h
index fc2bbc2d19..a423306bd2 100644
--- a/arm_compute/graph/nodes/L2NormalizeLayer.h
+++ b/arm_compute/graph/nodes/L2NormalizeLayer.h
@@ -33,7 +33,7 @@ namespace arm_compute
 {
 namespace graph
 {
-/** L2Normalize layer node */
+/** L2NormalizeLayer layer node */
 class L2NormalizeLayer final : public INode
 {
 public:
diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h
index 9a20769ca1..f6ecef7a51 100644
--- a/arm_compute/runtime/CL/CLFunctions.h
+++ b/arm_compute/runtime/CL/CLFunctions.h
@@ -42,9 +42,9 @@
 #include "arm_compute/runtime/CL/functions/CLColorConvert.h"
 #include "arm_compute/runtime/CL/functions/CLConvolution.h"
 #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
-#include "arm_compute/runtime/CL/functions/CLDepthConcatenate.h"
-#include "arm_compute/runtime/CL/functions/CLDepthConvert.h"
-#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h"
+#include "arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h"
+#include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h"
+#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h"
 #include "arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h"
 #include "arm_compute/runtime/CL/functions/CLDequantizationLayer.h"
 #include "arm_compute/runtime/CL/functions/CLDerivative.h"
@@ -72,7 +72,7 @@
 #include "arm_compute/runtime/CL/functions/CLHarrisCorners.h"
 #include "arm_compute/runtime/CL/functions/CLHistogram.h"
 #include "arm_compute/runtime/CL/functions/CLIntegralImage.h"
-#include "arm_compute/runtime/CL/functions/CLL2Normalize.h"
+#include "arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h"
 #include "arm_compute/runtime/CL/functions/CLLaplacianPyramid.h"
 #include "arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h"
 #include "arm_compute/runtime/CL/functions/CLLocallyConnectedLayer.h"
diff --git a/arm_compute/runtime/CL/functions/CLDepthConcatenate.h b/arm_compute/runtime/CL/functions/CLDepthConcatenate.h
deleted file mode 100644
index 77997f6bd1..0000000000
--- a/arm_compute/runtime/CL/functions/CLDepthConcatenate.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_CLDEPTHCONCATENATE_H__
-#define __ARM_COMPUTE_CLDEPTHCONCATENATE_H__
-
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-
-#include <memory>
-#include <vector>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels:
- *
- * -# @ref CLFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions)
- * -# @ref CLDepthConcatenateKernel
- *
- */
-class CLDepthConcatenate : public IFunction
-{
-public:
-    /** Default constructor */
-    CLDepthConcatenate();
-    /** Initialise the kernel's inputs vector and output.
-     *
-     * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QS8/QS16/F16/F32.
-     * @param[out]    output        Output tensor. Data types supported: Same as @p input.
-     */
-    void configure(std::vector<ICLTensor *> inputs_vector, ICLTensor *output);
-
-    // Inherited methods overridden:
-    void run() override;
-
-private:
-    std::vector<ICLTensor *>                    _inputs_vector;
-    std::unique_ptr<CLDepthConcatenateKernel[]> _concat_kernels_vector;
-    std::unique_ptr<CLFillBorderKernel[]>       _border_handlers_vector;
-    unsigned int                                _num_inputs;
-};
-}
-#endif /* __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h b/arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h
new file mode 100644
index 0000000000..00b3b66c97
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDEPTHCONCATENATE_H__
+#define __ARM_COMPUTE_CLDEPTHCONCATENATE_H__
+
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Window.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
+#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+
+#include <memory>
+#include <vector>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels:
+ *
+ * -# @ref CLFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions)
+ * -# @ref CLDepthConcatenateLayerKernel
+ *
+ */
+class CLDepthConcatenateLayer : public IFunction
+{
+public:
+    /** Default constructor */
+    CLDepthConcatenateLayer();
+    /** Initialise the kernel's inputs vector and output.
+     *
+     * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: QS8/QS16/F16/F32.
+     * @param[out]    output        Output tensor. Data types supported: Same as @p input.
+     */
+    void configure(std::vector<ICLTensor *> inputs_vector, ICLTensor *output);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    std::vector<ICLTensor *>                         _inputs_vector;
+    std::unique_ptr<CLDepthConcatenateLayerKernel[]> _concat_kernels_vector;
+    std::unique_ptr<CLFillBorderKernel[]>            _border_handlers_vector;
+    unsigned int                                     _num_inputs;
+};
+}
+#endif /* __ARM_COMPUTE_CLDEPTHCONCATENATE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLDepthConvert.h b/arm_compute/runtime/CL/functions/CLDepthConvert.h
deleted file mode 100644
index 9a4c63dd6d..0000000000
--- a/arm_compute/runtime/CL/functions/CLDepthConvert.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_CLDEPTHCONVERT_H__
-#define __ARM_COMPUTE_CLDEPTHCONVERT_H__
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to run @ref CLDepthConvertKernel. */
-class CLDepthConvert : public ICLSimpleFunction
-{
-public:
-    /** Initialize the function's source, destination
-     *
-     * Input data type must be different than output data type.
-     *
-     * Valid conversions Input -> Output :
-     *
-     *   - QS8 -> F32
-     *   - QS16 -> F32
-     *   - U8 -> U16, S16, U32, S32
-     *   - U16 -> U8, U32, S32
-     *   - S16 -> U8, U32, S32
-     *   - U32 -> U8, U16, S16
-     *   - S32 -> U8, U16, S16
-     *   - F32 -> QS8, QS16
-     *
-     * @param[in]  input  The input tensor to convert. Data types supported: QS8/U8/U16/S16/Q16/U32/S32/F32.
-     * @param[out] output The output tensor. Data types supported: QS8/U8/U16/S16/QS16/U32/S32/F32.
-     * @param[in]  policy Conversion policy.
-     * @param[in]  shift  Value for down/up conversions. Must be 0 <= shift < 8.
-     *                    It is not used on fixed point conversion.
-     */
-    void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
-};
-}
-#endif /*__ARM_COMPUTE_CLDEPTHCONVERT_H__*/
diff --git a/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
new file mode 100644
index 0000000000..c84dc15508
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLDepthConvertLayer.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDEPTHCONVERT_H__
+#define __ARM_COMPUTE_CLDEPTHCONVERT_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to run @ref CLDepthConvertLayerKernel. */
+class CLDepthConvertLayer : public ICLSimpleFunction
+{
+public:
+    /** Initialize the function's source, destination
+     *
+     * Input data type must be different than output data type.
+     *
+     * Valid conversions Input -> Output :
+     *
+     *   - QS8 -> F32
+     *   - QS16 -> F32
+     *   - U8 -> U16, S16, U32, S32
+     *   - U16 -> U8, U32, S32
+     *   - S16 -> U8, U32, S32
+     *   - U32 -> U8, U16, S16
+     *   - S32 -> U8, U16, S16
+     *   - F32 -> QS8, QS16
+     *
+     * @param[in]  input  The input tensor to convert. Data types supported: QS8/U8/U16/S16/Q16/U32/S32/F32.
+     * @param[out] output The output tensor. Data types supported: QS8/U8/U16/S16/QS16/U32/S32/F32.
+     * @param[in]  policy Conversion policy.
+     * @param[in]  shift  Value for down/up conversions. Must be 0 <= shift < 8.
+     *                    It is not used on fixed point conversion.
+     */
+    void configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift);
+};
+}
+#endif /*__ARM_COMPUTE_CLDEPTHCONVERT_H__*/
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h
deleted file mode 100644
index 40eb8523fb..0000000000
--- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__
-#define __ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__
-
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h"
-#include "arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h"
-#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/IFunction.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Basic function to execute a depthwise convolution for kernel size 3x3xC. This function calls the following OpenCL kernels:
- *
- * -# @ref CLDepthwiseConvolution3x3Kernel
- * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)
- *
- */
-class CLDepthwiseConvolution3x3 : public IFunction
-{
-public:
-    /** Default constructor */
-    CLDepthwiseConvolution3x3();
-    /** Initialize the function's source, destination, conv and border_size.
-     *
-     * @param[in, out] input     Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling).
-     * @param[in]      weights   Weights tensor. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input.
-     * @param[in]      biases    (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
-     *                           Data type supported: Same as @p input.
-     * @param[out]     output    Destination tensor. Data type supported: same as @p input.
-     * @param[in]      conv_info Padding and stride information to use for the convolution.
-     */
-    void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
-
-    // Inherited methods overriden:
-    void run() override;
-
-private:
-    CLDepthwiseConvolution3x3Kernel _kernel;
-    CLFillBorderKernel              _border_handler;
-};
-
-/** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels:
- *
- * -# @ref CLDepthwiseIm2ColKernel
- * -# @ref CLGEMMMatrixVectorMultiplyKernel
- * -# @ref CLDepthwiseWeightsReshapeKernel
- * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)
- *
- */
-class CLDepthwiseConvolution : public IFunction
-{
-public:
-    /** Default constructor */
-    CLDepthwiseConvolution();
-    /** Initialize the function's source, destination, weights and convolution information.
-     *
-     * @param[in, out] input     Source tensor. Data type supported: F32. (Written to only for border filling).
-     * @param[in]      weights   Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
-     * @param[in]      biases    (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
-     *                           Data type supported: Same as @p input.
-     * @param[out]     output    Destination tensor. Data type supported: same as @p input.
-     * @param[in]      conv_info Padding and stride information to use for the convolution.
-     */
-    void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
-
-    // Inherited methods overriden:
-    void run() override;
-
-private:
-    CLDepthwiseIm2ColKernel          _im2col_kernel;
-    CLDepthwiseWeightsReshapeKernel  _weights_reshape_kernel;
-    CLGEMMMatrixVectorMultiplyKernel _v2mm_kernel;
-    CLDepthwiseVectorToTensorKernel  _vector_to_tensor_kernel;
-    CLFillBorderKernel               _v2mm_input_fill_border;
-    CLFillBorderKernel               _v2mm_weights_fill_border;
-    CLTensor                         _input_reshaped;
-    CLTensor                         _weights_reshaped;
-    CLTensor                         _v2mm_output;
-};
-}
-#endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
new file mode 100644
index 0000000000..f7899415d2
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__
+#define __ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__
+
+#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h"
+#include "arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h"
+#include "arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h"
+#include "arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h"
+#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Basic function to execute a depthwise convolution for kernel size 3x3xC. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLDepthwiseConvolutionLayer3x3Kernel
+ * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)
+ *
+ */
+class CLDepthwiseConvolutionLayer3x3 : public IFunction
+{
+public:
+    /** Default constructor */
+    CLDepthwiseConvolutionLayer3x3();
+    /** Initialize the function's source, destination, conv and border_size.
+     *
+     * @param[in, out] input     Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling).
+     * @param[in]      weights   Weights tensor. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input.
+     * @param[in]      biases    (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+     *                           Data type supported: Same as @p input.
+     * @param[out]     output    Destination tensor. Data type supported: same as @p input.
+     * @param[in]      conv_info Padding and stride information to use for the convolution.
+     */
+    void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
+
+    // Inherited methods overriden:
+    void run() override;
+
+private:
+    CLDepthwiseConvolutionLayer3x3Kernel _kernel;
+    CLFillBorderKernel                   _border_handler;
+};
+
+/** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels:
+ *
+ * -# @ref CLDepthwiseIm2ColKernel
+ * -# @ref CLGEMMMatrixVectorMultiplyKernel
+ * -# @ref CLDepthwiseWeightsReshapeKernel
+ * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0)
+ *
+ */
+class CLDepthwiseConvolutionLayer : public IFunction
+{
+public:
+    /** Default constructor */
+    CLDepthwiseConvolutionLayer();
+    /** Initialize the function's source, destination, weights and convolution information.
+     *
+     * @param[in, out] input     Source tensor. Data type supported: F32. (Written to only for border filling).
+     * @param[in]      weights   Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
+     * @param[in]      biases    (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+     *                           Data type supported: Same as @p input.
+     * @param[out]     output    Destination tensor. Data type supported: same as @p input.
+     * @param[in]      conv_info Padding and stride information to use for the convolution.
+     */
+    void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info);
+
+    // Inherited methods overriden:
+    void run() override;
+
+private:
+    CLDepthwiseIm2ColKernel          _im2col_kernel;
+    CLDepthwiseWeightsReshapeKernel  _weights_reshape_kernel;
+    CLGEMMMatrixVectorMultiplyKernel _v2mm_kernel;
+    CLDepthwiseVectorToTensorKernel  _vector_to_tensor_kernel;
+    CLFillBorderKernel               _v2mm_input_fill_border;
+    CLFillBorderKernel               _v2mm_weights_fill_border;
+    CLTensor                         _input_reshaped;
+    CLTensor                         _weights_reshaped;
+    CLTensor                         _v2mm_output;
+};
+}
+#endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h
index a38446293b..27cee5ed3b 100644
--- a/arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h
@@ -27,7 +27,7 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h"
+#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h"
 #include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h"
 #include "arm_compute/runtime/IFunction.h"
 
@@ -39,7 +39,7 @@ class ICLTensor;
 
 /** Basic function to execute depthwise convolution. This function calls the following OpenCL kernels and function:
  *
- * -# @ref CLDepthwiseConvolution
+ * -# @ref CLDepthwiseConvolutionLayer
  * -# @ref CLDirectConvolutionLayer
  *
  */
@@ -72,8 +72,8 @@ public:
     void run() override;
 
 private:
-    CLDepthwiseConvolution   _depthwise_conv;
-    CLDirectConvolutionLayer _pointwise_conv;
+    CLDepthwiseConvolutionLayer _depthwise_conv;
+    CLDirectConvolutionLayer    _pointwise_conv;
 };
 }
 #endif /*__ARM_COMPUTE_CL_DEPTHWISE_SEPARABLE_CONVOLUTION_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLL2Normalize.h b/arm_compute/runtime/CL/functions/CLL2Normalize.h
deleted file mode 100644
index 20af54eda2..0000000000
--- a/arm_compute/runtime/CL/functions/CLL2Normalize.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_CLL2NORMALIZE_H__
-#define __ARM_COMPUTE_CLL2NORMALIZE_H__
-
-#include "arm_compute/core/CL/kernels/CLL2NormalizeKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLMemoryGroup.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-#include "arm_compute/runtime/CL/functions/CLReductionOperation.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-
-#include <cstdint>
-#include <memory>
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Perform reduction operation.
- */
-class CLL2Normalize : public IFunction
-{
-public:
-    /** Constructor */
-    CLL2Normalize(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-
-    /** Set the input and output tensors.
-     *
-     * @param[in]  input   Source tensor. Data types supported: QS8, QS16, F32.
-     * @param[out] output  Destination tensor. Data types supported: Same as @p input.
-     * @param[in]  axis    Axis along which to reduce. Supported reduction axis : 0
-     * @param[in]  epsilon Lower bound value for the normalization.
-     */
-    void configure(ICLTensor *input, ICLTensor *output, unsigned int axis, float epsilon = 1e-12);
-
-    // Inherited methods overridden:
-    void run() override;
-
-private:
-    CLMemoryGroup        _memory_group;
-    CLReductionOperation _reduce_func;
-    CLL2NormalizeKernel  _normalize_kernel;
-    CLTensor             _sumsq;
-};
-}
-#endif /*__ARM_COMPUTE_CLL2NORMALIZE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h
new file mode 100644
index 0000000000..8aea7a641b
--- /dev/null
+++ b/arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_CLL2NORMALIZE_H__
+#define __ARM_COMPUTE_CLL2NORMALIZE_H__
+
+#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLMemoryGroup.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+#include "arm_compute/runtime/CL/functions/CLReductionOperation.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+
+#include <cstdint>
+#include <memory>
+
+namespace arm_compute
+{
+class ICLTensor;
+
+/** Perform reduction operation.
+ */
+class CLL2NormalizeLayer : public IFunction
+{
+public:
+    /** Constructor */
+    CLL2NormalizeLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input   Source tensor. Data types supported: QS8, QS16, F32.
+     * @param[out] output  Destination tensor. Data types supported: Same as @p input.
+     * @param[in]  axis    Axis along which to reduce. Supported reduction axis : 0
+     * @param[in]  epsilon Lower bound value for the normalization.
+     */
+    void configure(ICLTensor *input, ICLTensor *output, unsigned int axis, float epsilon = 1e-12);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    CLMemoryGroup            _memory_group;
+    CLReductionOperation     _reduce_func;
+    CLL2NormalizeLayerKernel _normalize_kernel;
+    CLTensor                 _sumsq;
+};
+}
+#endif /*__ARM_COMPUTE_CLL2NORMALIZE_H__ */
diff --git a/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h b/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h
index 0c6708aa73..585a013e31 100644
--- a/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h
+++ b/arm_compute/runtime/CL/functions/CLLaplacianPyramid.h
@@ -27,7 +27,7 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CL/CLPyramid.h"
 #include "arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h"
-#include "arm_compute/runtime/CL/functions/CLDepthConvert.h"
+#include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h"
 #include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
 #include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h"
 #include "arm_compute/runtime/IFunction.h"
@@ -77,7 +77,7 @@ private:
     CLGaussianPyramidHalf                      _gaussian_pyr_function;
     std::unique_ptr<CLGaussian5x5[]>           _convf;
     std::unique_ptr<CLArithmeticSubtraction[]> _subf;
-    CLDepthConvert                             _depth_function;
+    CLDepthConvertLayer                        _depth_function;
     CLPyramid                                  _gauss_pyr;
     CLPyramid                                  _conv_pyr;
 };
diff --git a/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h b/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h
index 4bc7eb65ce..4a676c85a0 100644
--- a/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h
+++ b/arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h
@@ -27,7 +27,7 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CL/CLPyramid.h"
 #include "arm_compute/runtime/CL/functions/CLArithmeticAddition.h"
-#include "arm_compute/runtime/CL/functions/CLDepthConvert.h"
+#include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h"
 #include "arm_compute/runtime/CL/functions/CLScale.h"
 #include "arm_compute/runtime/IFunction.h"
 
@@ -43,7 +43,7 @@ using ICLImage = ICLTensor;
  *
  * -# @ref CLArithmeticAddition
  * -# @ref CLScale
- * -# @ref CLDepthConvert
+ * -# @ref CLDepthConvertLayer
  *
  * This function reconstructs the original image from a Laplacian Image Pyramid.
  *
@@ -85,7 +85,7 @@ private:
     CLPyramid                               _tmp_pyr;
     std::unique_ptr<CLArithmeticAddition[]> _addf;
     std::unique_ptr<CLScale[]>              _scalef;
-    CLDepthConvert                          _depthf;
+    CLDepthConvertLayer                     _depthf;
 };
 }
 #endif /*__ARM_COMPUTE_CLLAPLACIANRECONSTRUCT_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h b/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h
index 8a345c5fab..e76d4efb27 100644
--- a/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h
+++ b/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h
@@ -28,7 +28,7 @@
 #include "arm_compute/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.h"
 #include "arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h"
 #include "arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h"
-#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h"
 #include "arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h"
 #include "arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h"
 #include "arm_compute/runtime/GLES_COMPUTE/functions/GCFillBorder.h"
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h
deleted file mode 100644
index 801dc0e111..0000000000
--- a/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_GCDEPTHCONCATENATE_H__
-#define __ARM_COMPUTE_GCDEPTHCONCATENATE_H__
-
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h"
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-
-#include <memory>
-#include <vector>
-
-namespace arm_compute
-{
-class IGCTensor;
-
-/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels:
- *
- * -# @ref GCFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions)
- * -# @ref GCDepthConcatenateKernel
- *
- */
-class GCDepthConcatenate : public IFunction
-{
-public:
-    /** Default constructor */
-    GCDepthConcatenate();
-    /** Initialise the kernel's inputs vector and output.
-     *
-     * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: F16/F32.
-     * @param[out]    output        Output tensor. Data types supported: Same as @p input.
-     */
-    void configure(std::vector<IGCTensor *> inputs_vector, IGCTensor *output);
-
-    // Inherited methods overridden:
-    void run() override;
-
-private:
-    std::unique_ptr<GCDepthConcatenateKernel[]> _concat_kernels_vector;
-    std::unique_ptr<GCFillBorderKernel[]>       _border_handlers_vector;
-    unsigned int                                _num_inputs;
-};
-}
-#endif /* __ARM_COMPUTE_GCDEPTHCONCATENATE_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h
new file mode 100644
index 0000000000..1151399f92
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCDEPTHCONCATENATE_H__
+#define __ARM_COMPUTE_GCDEPTHCONCATENATE_H__
+
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+#include <vector>
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels:
+ *
+ * -# @ref GCFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions)
+ * -# @ref GCDepthConcatenateLayerKernel
+ *
+ */
+class GCDepthConcatenateLayer : public IFunction
+{
+public:
+    /** Default constructor */
+    GCDepthConcatenateLayer();
+    /** Initialise the kernel's inputs vector and output.
+     *
+     * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: F16/F32.
+     * @param[out]    output        Output tensor. Data types supported: Same as @p input.
+     */
+    void configure(std::vector<IGCTensor *> inputs_vector, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    std::unique_ptr<GCDepthConcatenateLayerKernel[]> _concat_kernels_vector;
+    std::unique_ptr<GCFillBorderKernel[]>            _border_handlers_vector;
+    unsigned int                                     _num_inputs;
+};
+}
+#endif /* __ARM_COMPUTE_GCDEPTHCONCATENATE_H__ */
diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h
index 2e8c084371..08852cf368 100644
--- a/arm_compute/runtime/NEON/NEFunctions.h
+++ b/arm_compute/runtime/NEON/NEFunctions.h
@@ -45,9 +45,9 @@
 #include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEDeconvolutionLayerUpsample.h"
-#include "arm_compute/runtime/NEON/functions/NEDepthConcatenate.h"
-#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h"
-#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolution.h"
+#include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEDequantizationLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEDerivative.h"
@@ -77,7 +77,7 @@
 #include "arm_compute/runtime/NEON/functions/NEHistogram.h"
 #include "arm_compute/runtime/NEON/functions/NEIm2Col.h"
 #include "arm_compute/runtime/NEON/functions/NEIntegralImage.h"
-#include "arm_compute/runtime/NEON/functions/NEL2Normalize.h"
+#include "arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h"
 #include "arm_compute/runtime/NEON/functions/NELaplacianPyramid.h"
 #include "arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h"
 #include "arm_compute/runtime/NEON/functions/NELocallyConnectedLayer.h"
diff --git a/arm_compute/runtime/NEON/functions/NEDepthConcatenate.h b/arm_compute/runtime/NEON/functions/NEDepthConcatenate.h
deleted file mode 100644
index cc65099575..0000000000
--- a/arm_compute/runtime/NEON/functions/NEDepthConcatenate.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_NEDEPTHCONCATENATE_H__
-#define __ARM_COMPUTE_NEDEPTHCONCATENATE_H__
-
-#include "arm_compute/runtime/IFunction.h"
-
-#include "arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-
-#include <memory>
-#include <vector>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels:
- *
- * -# @ref NEFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions)
- * -# @ref NEDepthConcatenateKernel
- *
- */
-class NEDepthConcatenate : public IFunction
-{
-public:
-    /** Default constructor */
-    NEDepthConcatenate();
-    /** Initialise the kernel's inputs vector and output.
-     *
-     * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported:  QS8/QS16/F16/F32.
-     * @param[out]    output        Output tensor. Data types supported: Same as @p inputs_vector.
-     */
-    void configure(std::vector<ITensor *> inputs_vector, ITensor *output);
-
-    // Inherited methods overridden:
-    void run() override;
-
-private:
-    std::vector<ITensor *>                      _inputs_vector;
-    std::unique_ptr<NEDepthConcatenateKernel[]> _concat_kernels_vector;
-    std::unique_ptr<NEFillBorderKernel[]>       _border_handlers_vector;
-    unsigned int                                _num_inputs;
-};
-}
-#endif /* __ARM_COMPUTE_NEDEPTHCONCATENATE_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h
new file mode 100644
index 0000000000..5b63b70634
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEDEPTHCONCATENATE_H__
+#define __ARM_COMPUTE_NEDEPTHCONCATENATE_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h"
+#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+
+#include <memory>
+#include <vector>
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels:
+ *
+ * -# @ref NEFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions)
+ * -# @ref NEDepthConcatenateLayerKernel
+ *
+ */
+class NEDepthConcatenateLayer : public IFunction
+{
+public:
+    /** Default constructor */
+    NEDepthConcatenateLayer();
+    /** Initialise the kernel's inputs vector and output.
+     *
+     * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported:  QS8/QS16/F16/F32.
+     * @param[out]    output        Output tensor. Data types supported: Same as @p inputs_vector.
+     */
+    void configure(std::vector<ITensor *> inputs_vector, ITensor *output);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    std::vector<ITensor *>                           _inputs_vector;
+    std::unique_ptr<NEDepthConcatenateLayerKernel[]> _concat_kernels_vector;
+    std::unique_ptr<NEFillBorderKernel[]>            _border_handlers_vector;
+    unsigned int                                     _num_inputs;
+};
+}
+#endif /* __ARM_COMPUTE_NEDEPTHCONCATENATE_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvert.h b/arm_compute/runtime/NEON/functions/NEDepthConvert.h
deleted file mode 100644
index 37f7293fb3..0000000000
--- a/arm_compute/runtime/NEON/functions/NEDepthConvert.h
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_NEDEPTHCONVERT_H__
-#define __ARM_COMPUTE_NEDEPTHCONVERT_H__
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/INESimpleFunction.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/**Basic function to run @ref NEDepthConvertKernel */
-class NEDepthConvert : public INESimpleFunction
-{
-public:
-    /* Contructor */
-    NEDepthConvert() = default;
-    /** Prevent instances of this class from being copied (As this class contains pointers)*/
-    NEDepthConvert(const NEDepthConvert &) = delete;
-    /** Prevent instances of this class from being copied (As this class contains pointers)*/
-    const NEDepthConvert &operator=(const NEDepthConvert &) = delete;
-    /** Initialize the function's source, destination
-     *
-     * Valid conversions Input -> Output :
-     *    QS8 -> QS8, F32
-     *    U8 -> U16, S16, S32
-     *    U16 -> U8, U32
-     *    S16 -> U8, S32
-     *    QS16 -> QS16, F32
-     *    F32 -> QS8, QS16
-     *
-     * @warning In case of in-place fixed point position conversion make sure that configure has been called
-     *          before the updated tensor is used in other functions, as the TensorInfo of the tensor will be
-     *          altered. In-place is only supported for QS8 -> QS8, QS16 -> QS16.
-     *
-     * @param[in, out] input  The input tensor to convert (Written in case of in-place computation). Data types supported: U8/QS8/U16/S16/F32.
-     * @param[out]     output The output tensor. Can be null in case of in-place computation. Data types supported: U8/QS8/U16/S16/U32/S32/F32.
-     * @param[in]      policy Conversion policy.
-     * @param[in]      shift  (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
-     *                        In case of fixed point position conversion, it specifies the new fixed point position, if operation is in-place.
-     */
-    void configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0);
-};
-}
-#endif /*__ARM_COMPUTE_NEDEPTHCONVERT_H__*/
diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
new file mode 100644
index 0000000000..b235e87b4a
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEDEPTHCONVERT_H__
+#define __ARM_COMPUTE_NEDEPTHCONVERT_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/INESimpleFunction.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+class ITensor;
+
+/**Basic function to run @ref NEDepthConvertLayerKernel */
+class NEDepthConvertLayer : public INESimpleFunction
+{
+public:
+    /* Contructor */
+    NEDepthConvertLayer() = default;
+    /** Prevent instances of this class from being copied (As this class contains pointers)*/
+    NEDepthConvertLayer(const NEDepthConvertLayer &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers)*/
+    const NEDepthConvertLayer &operator=(const NEDepthConvertLayer &) = delete;
+    /** Initialize the function's source, destination
+     *
+     * Valid conversions Input -> Output :
+     *    QS8 -> QS8, F32
+     *    U8 -> U16, S16, S32
+     *    U16 -> U8, U32
+     *    S16 -> U8, S32
+     *    QS16 -> QS16, F32
+     *    F32 -> QS8, QS16
+     *
+     * @warning In case of in-place fixed point position conversion make sure that configure has been called
+     *          before the updated tensor is used in other functions, as the TensorInfo of the tensor will be
+     *          altered. In-place is only supported for QS8 -> QS8, QS16 -> QS16.
+     *
+     * @param[in, out] input  The input tensor to convert (Written in case of in-place computation). Data types supported: U8/QS8/U16/S16/F32.
+     * @param[out]     output The output tensor. Can be null in case of in-place computation. Data types supported: U8/QS8/U16/S16/U32/S32/F32.
+     * @param[in]      policy Conversion policy.
+     * @param[in]      shift  (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
+     *                        In case of fixed point position conversion, it specifies the new fixed point position, if operation is in-place.
+     */
+    void configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0);
+};
+}
+#endif /*__ARM_COMPUTE_NEDEPTHCONVERT_H__*/
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolution.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolution.h
deleted file mode 100644
index f2c209cd80..0000000000
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolution.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__
-#define __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__
-
-#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolution3x3Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h"
-#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/Tensor.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to execute a depthwise convolution for kernel size 3x3xC. This function calls the following NEON kernels:
- *
- * -# @ref NEDepthwiseConvolution3x3
- * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0)
- *
- */
-class NEDepthwiseConvolution3x3 : public IFunction
-{
-public:
-    /** Default constructor */
-    NEDepthwiseConvolution3x3();
-    /** Initialize the function's source, destination, kernels and border_size.
-     *
-     * @param[in, out] input     Source tensor. Data type supported: F32. (Written to only for border filling).
-     * @param[in]      weights   Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input.
-     * @param[in]      biases    (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
-     *                           Data type supported: Same as @p input.
-     * @param[out]     output    Destination tensor. Data type supported: same as @p input.
-     * @param[in]      conv_info Padding and stride information to use for the convolution.
-     */
-    void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info);
-
-    // Inherited methods overriden:
-    void run() override;
-
-private:
-    NEDepthwiseConvolution3x3Kernel              _kernel;
-    NEDirectConvolutionLayerBiasAccumulateKernel _bias_kernel;
-    NEFillBorderKernel                           _border_handler;
-    bool                                         _has_bias;
-};
-
-/** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels:
- *
- * -# @ref NEDepthwiseIm2ColKernel
- * -# @ref NEDepthwiseWeightsReshapeKernel
- * -# @ref NEGEMMMatrixVectorMultiplyKernel
- * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0)
- *
- */
-class NEDepthwiseConvolution : public IFunction
-{
-public:
-    /** Default constructor */
-    NEDepthwiseConvolution();
-    /** Initialize the function's source, destination, weights and convolution information.
-     *
-     * @param[in, out] input     Source tensor. Data type supported: F32. (Written to only for border filling).
-     * @param[out]     output    Destination tensor. Data type supported: same as @p input.
-     * @param[in]      weights   Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
-     * @param[in]      biases    (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
-     *                           Data type supported: Same as @p input.
-     * @param[in]      conv_info Padding and stride information to use for the convolution.
-     */
-    void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info);
-
-    // Inherited methods overriden:
-    void run() override;
-
-private:
-    NEDepthwiseIm2ColKernel          _im2col_kernel;
-    NEDepthwiseWeightsReshapeKernel  _weights_reshape_kernel;
-    NEGEMMMatrixVectorMultiplyKernel _v2mm_kernel;
-    NEDepthwiseVectorToTensorKernel  _vector_to_tensor_kernel;
-    Tensor                           _input_reshaped;
-    Tensor                           _weights_reshaped;
-    Tensor                           _v2mm_output;
-};
-}
-#endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__ */
\ No newline at end of file
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
new file mode 100644
index 0000000000..0da16ab2a9
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__
+#define __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__
+
+#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h"
+#include "arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h"
+#include "arm_compute/core/NEON/kernels/NEDepthwiseVectorToTensorKernel.h"
+#include "arm_compute/core/NEON/kernels/NEDepthwiseWeightsReshapeKernel.h"
+#include "arm_compute/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.h"
+#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+#include "arm_compute/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/Tensor.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to execute a depthwise convolution for kernel size 3x3xC. This function calls the following NEON kernels:
+ *
+ * -# @ref NEDepthwiseConvolutionLayer3x3
+ * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0)
+ *
+ */
+class NEDepthwiseConvolutionLayer3x3 : public IFunction
+{
+public:
+    /** Default constructor */
+    NEDepthwiseConvolutionLayer3x3();
+    /** Initialize the function's source, destination, kernels and border_size.
+     *
+     * @param[in, out] input     Source tensor. Data type supported: F32. (Written to only for border filling).
+     * @param[in]      weights   Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input.
+     * @param[in]      biases    (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+     *                           Data type supported: Same as @p input.
+     * @param[out]     output    Destination tensor. Data type supported: same as @p input.
+     * @param[in]      conv_info Padding and stride information to use for the convolution.
+     */
+    void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info);
+
+    // Inherited methods overriden:
+    void run() override;
+
+private:
+    NEDepthwiseConvolutionLayer3x3Kernel         _kernel;
+    NEDirectConvolutionLayerBiasAccumulateKernel _bias_kernel;
+    NEFillBorderKernel                           _border_handler;
+    bool                                         _has_bias;
+};
+
+/** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels:
+ *
+ * -# @ref NEDepthwiseIm2ColKernel
+ * -# @ref NEDepthwiseWeightsReshapeKernel
+ * -# @ref NEGEMMMatrixVectorMultiplyKernel
+ * -# @ref NEFillBorderKernel (if pad_x or pad_y > 0)
+ *
+ */
+class NEDepthwiseConvolutionLayer : public IFunction
+{
+public:
+    /** Default constructor */
+    NEDepthwiseConvolutionLayer();
+    /** Initialize the function's source, destination, weights and convolution information.
+     *
+     * @param[in, out] input     Source tensor. Data type supported: F32. (Written to only for border filling).
+     * @param[out]     output    Destination tensor. Data type supported: same as @p input.
+     * @param[in]      weights   Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input.
+     * @param[in]      biases    (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed.
+     *                           Data type supported: Same as @p input.
+     * @param[in]      conv_info Padding and stride information to use for the convolution.
+     */
+    void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info);
+
+    // Inherited methods overriden:
+    void run() override;
+
+private:
+    NEDepthwiseIm2ColKernel          _im2col_kernel;
+    NEDepthwiseWeightsReshapeKernel  _weights_reshape_kernel;
+    NEGEMMMatrixVectorMultiplyKernel _v2mm_kernel;
+    NEDepthwiseVectorToTensorKernel  _vector_to_tensor_kernel;
+    Tensor                           _input_reshaped;
+    Tensor                           _weights_reshaped;
+    Tensor                           _v2mm_output;
+};
+}
+#endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H__ */
\ No newline at end of file
diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h
index 3f4c1389f0..0562c07515 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthwiseSeparableConvolutionLayer.h
@@ -27,7 +27,7 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/NEON/INESimpleFunction.h"
-#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolution.h"
+#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
 #include "arm_compute/runtime/Tensor.h"
 
@@ -39,7 +39,7 @@ class ITensor;
 
 /** Basic function to execute depthwise convolution. This function calls the following NEON kernels and function:
  *
- * -# @ref NEDepthwiseConvolution
+ * -# @ref NEDepthwiseConvolutionLayer
  * -# @ref NEDirectConvolutionLayer
  *
  */
@@ -72,8 +72,8 @@ public:
     void run() override;
 
 private:
-    NEDepthwiseConvolution   _depthwise_conv;
-    NEDirectConvolutionLayer _pointwise_conv;
+    NEDepthwiseConvolutionLayer _depthwise_conv;
+    NEDirectConvolutionLayer    _pointwise_conv;
 };
 }
 #endif /*__ARM_COMPUTE_NEON_DEPTHWISE_SEPARABLE_CONVOLUTION_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEL2Normalize.h b/arm_compute/runtime/NEON/functions/NEL2Normalize.h
deleted file mode 100644
index 95d5186c13..0000000000
--- a/arm_compute/runtime/NEON/functions/NEL2Normalize.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_NEL2NORMALIZE_H__
-#define __ARM_COMPUTE_NEL2NORMALIZE_H__
-
-#include "arm_compute/core/NEON/kernels/NEL2NormalizeKernel.h"
-#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/runtime/IMemoryManager.h"
-#include "arm_compute/runtime/MemoryGroup.h"
-#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
-#include "arm_compute/runtime/Tensor.h"
-
-#include <memory>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Basic function to perform a L2 normalization on a given axis.
- *
- * This function runs the following kernels:
- * -# @ref NEReductionOperation
- * -# @ref NEL2NormalizeKernel
- */
-class NEL2Normalize : public IFunction
-{
-public:
-    /** Constructor */
-    NEL2Normalize(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
-    /** Set the input and output tensors.
-     *
-     * @param[in, out] input   Source tensor. Data types supported: F32. (Written to only for border_size != 0)
-     * @param[out]     output  Destination tensor. Data types supported: same as @p input.
-     * @param[in]      axis    Dimension along which to reduce. Supported reduction axis : 0
-     * @param[in]      epsilon Lower bound value for the normalization.
-     */
-    void configure(ITensor *input, ITensor *output, unsigned int axis, float epsilon = 1e-12);
-
-    // Inherited methods overridden:
-    void run() override;
-
-private:
-    MemoryGroup          _memory_group;
-    NEReductionOperation _reduce_func;
-    NEL2NormalizeKernel  _normalize_kernel;
-    Tensor               _sumsq;
-};
-}
-#endif /* __ARM_COMPUTE_NEL2NORMALIZE_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
new file mode 100644
index 0000000000..100e239406
--- /dev/null
+++ b/arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_NEL2NORMALIZE_H__
+#define __ARM_COMPUTE_NEL2NORMALIZE_H__
+
+#include "arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h"
+#include "arm_compute/runtime/IFunction.h"
+#include "arm_compute/runtime/IMemoryManager.h"
+#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEReductionOperation.h"
+#include "arm_compute/runtime/Tensor.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to perform a L2 normalization on a given axis.
+ *
+ * This function runs the following kernels:
+ * -# @ref NEReductionOperation
+ * -# @ref NEL2NormalizeLayerKernel
+ */
+class NEL2NormalizeLayer : public IFunction
+{
+public:
+    /** Constructor */
+    NEL2NormalizeLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+    /** Set the input and output tensors.
+     *
+     * @param[in, out] input   Source tensor. Data types supported: F32. (Written to only for border_size != 0)
+     * @param[out]     output  Destination tensor. Data types supported: same as @p input.
+     * @param[in]      axis    Dimension along which to reduce. Supported reduction axis : 0
+     * @param[in]      epsilon Lower bound value for the normalization.
+     */
+    void configure(ITensor *input, ITensor *output, unsigned int axis, float epsilon = 1e-12);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    MemoryGroup              _memory_group;
+    NEReductionOperation     _reduce_func;
+    NEL2NormalizeLayerKernel _normalize_kernel;
+    Tensor                   _sumsq;
+};
+}
+#endif /* __ARM_COMPUTE_NEL2NORMALIZE_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h b/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h
index 991ae7c293..baa4b7b1a5 100644
--- a/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h
+++ b/arm_compute/runtime/NEON/functions/NELaplacianPyramid.h
@@ -27,7 +27,7 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
-#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h"
+#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h"
 #include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h"
 #include "arm_compute/runtime/Pyramid.h"
@@ -79,7 +79,7 @@ private:
     std::unique_ptr<NEArithmeticSubtraction[]> _subf;
     Pyramid                                    _gauss_pyr;
     Pyramid                                    _conv_pyr;
-    NEDepthConvert                             _depth_function;
+    NEDepthConvertLayer                        _depth_function;
 };
 }
 #endif /*__ARM_COMPUTE_NELAPLACIANPYRAMID_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h b/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h
index 4139733499..3d423607a3 100644
--- a/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h
+++ b/arm_compute/runtime/NEON/functions/NELaplacianReconstruct.h
@@ -27,7 +27,7 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
-#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h"
+#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEScale.h"
 #include "arm_compute/runtime/Pyramid.h"
 
@@ -43,7 +43,7 @@ using IImage = ITensor;
  *
  * -# @ref NEArithmeticAddition
  * -# @ref NEScale
- * -# @ref NEDepthConvert
+ * -# @ref NEDepthConvertLayer
  *
  * This function reconstructs the original image from a Laplacian Image Pyramid.
  *
@@ -85,7 +85,7 @@ private:
     Pyramid                                 _tmp_pyr;
     std::unique_ptr<NEArithmeticAddition[]> _addf;
     std::unique_ptr<NEScale[]>              _scalef;
-    NEDepthConvert                          _depthf;
+    NEDepthConvertLayer                     _depthf;
 };
 }
 #endif /*__ARM_COMPUTE_NELAPLACIANRECONSTRUCT_H__ */
diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox
index cc12897278..9d478e051a 100644
--- a/docs/00_introduction.dox
+++ b/docs/00_introduction.dox
@@ -175,21 +175,21 @@ v17.09 Public major release
     - @ref arm_compute::NEGEMMAssemblyBaseKernel @ref arm_compute::NEGEMMAArch64Kernel
     - @ref arm_compute::NEDequantizationLayerKernel / @ref arm_compute::NEDequantizationLayer
     - @ref arm_compute::NEFloorKernel / @ref arm_compute::NEFloor
-    - @ref arm_compute::NEL2NormalizeKernel / @ref arm_compute::NEL2Normalize
+    - @ref arm_compute::NEL2NormalizeLayerKernel / @ref arm_compute::NEL2NormalizeLayer
     - @ref arm_compute::NEQuantizationLayerKernel @ref arm_compute::NEMinMaxLayerKernel / @ref arm_compute::NEQuantizationLayer
     - @ref arm_compute::NEROIPoolingLayerKernel / @ref arm_compute::NEROIPoolingLayer
     - @ref arm_compute::NEReductionOperationKernel / @ref arm_compute::NEReductionOperation
     - @ref arm_compute::NEReshapeLayerKernel / @ref arm_compute::NEReshapeLayer
 
  - New OpenCL kernels / functions:
-    - @ref arm_compute::CLDepthwiseConvolution3x3Kernel @ref arm_compute::CLDepthwiseIm2ColKernel @ref arm_compute::CLDepthwiseVectorToTensorKernel @ref arm_compute::CLDepthwiseWeightsReshapeKernel / @ref arm_compute::CLDepthwiseConvolution3x3 @ref arm_compute::CLDepthwiseConvolution @ref arm_compute::CLDepthwiseSeparableConvolutionLayer
+    - @ref arm_compute::CLDepthwiseConvolutionLayer3x3Kernel @ref arm_compute::CLDepthwiseIm2ColKernel @ref arm_compute::CLDepthwiseVectorToTensorKernel @ref arm_compute::CLDepthwiseWeightsReshapeKernel / @ref arm_compute::CLDepthwiseConvolutionLayer3x3 @ref arm_compute::CLDepthwiseConvolutionLayer @ref arm_compute::CLDepthwiseSeparableConvolutionLayer
     - @ref arm_compute::CLDequantizationLayerKernel / @ref arm_compute::CLDequantizationLayer
     - @ref arm_compute::CLDirectConvolutionLayerKernel / @ref arm_compute::CLDirectConvolutionLayer
     - @ref arm_compute::CLFlattenLayer
     - @ref arm_compute::CLFloorKernel / @ref arm_compute::CLFloor
     - @ref arm_compute::CLGEMMTranspose1xW
     - @ref arm_compute::CLGEMMMatrixVectorMultiplyKernel
-    - @ref arm_compute::CLL2NormalizeKernel / @ref arm_compute::CLL2Normalize
+    - @ref arm_compute::CLL2NormalizeLayerKernel / @ref arm_compute::CLL2NormalizeLayer
     - @ref arm_compute::CLQuantizationLayerKernel @ref arm_compute::CLMinMaxLayerKernel / @ref arm_compute::CLQuantizationLayer
     - @ref arm_compute::CLROIPoolingLayerKernel / @ref arm_compute::CLROIPoolingLayer
     - @ref arm_compute::CLReductionOperationKernel / @ref arm_compute::CLReductionOperation
@@ -206,7 +206,7 @@ v17.06 Public major release
  - User can specify his own scheduler by implementing the @ref arm_compute::IScheduler interface.
  - New OpenCL kernels / functions:
     - @ref arm_compute::CLBatchNormalizationLayerKernel / @ref arm_compute::CLBatchNormalizationLayer
-    - @ref arm_compute::CLDepthConcatenateKernel / @ref arm_compute::CLDepthConcatenate
+    - @ref arm_compute::CLDepthConcatenateLayerKernel / @ref arm_compute::CLDepthConcatenateLayer
     - @ref arm_compute::CLHOGOrientationBinningKernel @ref arm_compute::CLHOGBlockNormalizationKernel, @ref arm_compute::CLHOGDetectorKernel / @ref arm_compute::CLHOGDescriptor @ref arm_compute::CLHOGDetector @ref arm_compute::CLHOGGradient @ref arm_compute::CLHOGMultiDetection
     - @ref arm_compute::CLLocallyConnectedMatrixMultiplyKernel / @ref arm_compute::CLLocallyConnectedLayer
     - @ref arm_compute::CLWeightsReshapeKernel / @ref arm_compute::CLConvolutionLayerReshapeWeights
@@ -214,7 +214,7 @@ v17.06 Public major release
     - @ref arm_compute::CPPDetectionWindowNonMaximaSuppressionKernel
  - New NEON kernels / functions:
     - @ref arm_compute::NEBatchNormalizationLayerKernel / @ref arm_compute::NEBatchNormalizationLayer
-    - @ref arm_compute::NEDepthConcatenateKernel / @ref arm_compute::NEDepthConcatenate
+    - @ref arm_compute::NEDepthConcatenateLayerKernel / @ref arm_compute::NEDepthConcatenateLayer
     - @ref arm_compute::NEDirectConvolutionLayerKernel / @ref arm_compute::NEDirectConvolutionLayer
     - @ref arm_compute::NELocallyConnectedMatrixMultiplyKernel / @ref arm_compute::NELocallyConnectedLayer
     - @ref arm_compute::NEWeightsReshapeKernel / @ref arm_compute::NEConvolutionLayerReshapeWeights
diff --git a/src/core/CL/kernels/CLDepthConcatenateKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateKernel.cpp
deleted file mode 100644
index edfbf829ed..0000000000
--- a/src/core/CL/kernels/CLDepthConcatenateKernel.cpp
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-
-#include "support/ToolchainSupport.h"
-
-#include <map>
-
-using namespace arm_compute;
-
-CLDepthConcatenateKernel::CLDepthConcatenateKernel()
-    : _input(nullptr), _output(nullptr), _top_bottom(0), _left_right(0), _depth_offset(0)
-{
-}
-
-BorderSize CLDepthConcatenateKernel::border_size() const
-{
-    return BorderSize(_top_bottom, _left_right);
-}
-
-void CLDepthConcatenateKernel::configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output)
-{
-    static std::map<int, std::pair<std::string, int>> configs_map =
-    {
-        { 1, { "uchar", 16 } },
-        { 2, { "ushort", 8 } },
-        { 4, { "uint", 4 } },
-        { 8, { "ulong", 2 } },
-    };
-
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output);
-    ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) + depth_offset > output->info()->dimension(2));
-    ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) > output->info()->dimension(0));
-    ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) > output->info()->dimension(1));
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(3, input, output);
-    ARM_COMPUTE_ERROR_ON(configs_map.find(input->info()->element_size()) == configs_map.end());
-
-    // The gaps between the two lowest dimensions of input and output need to be divisible by 2
-    // Otherwise it is not clear how the padding should be added onto the input tensor
-    ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) - input->info()->dimension(0)) % 2);
-    ARM_COMPUTE_ERROR_ON((output->info()->dimension(1) - input->info()->dimension(1)) % 2);
-
-    _input        = input;
-    _output       = output;
-    _depth_offset = depth_offset;
-
-    // Add build options
-    auto                  config = configs_map.find(static_cast<int>(input->info()->element_size()));
-    std::set<std::string> build_opts;
-    build_opts.emplace(("-DDATA_TYPE=" + config->second.first));
-    build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(config->second.second)));
-
-    // Create kernel
-    _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("concatenate_depth", build_opts));
-
-    // Configure kernel window
-    _left_right = (output->info()->dimension(0) - input->info()->dimension(0)) / 2;
-    _top_bottom = (output->info()->dimension(1) - input->info()->dimension(1)) / 2;
-
-    const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size();
-    const unsigned int num_elems_read_per_iteration      = 16 / input->info()->element_size();
-    const unsigned int num_rows_read_per_iteration       = 1;
-
-    // The window needs to be based on input as we copy all the depths of input
-    Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
-    win.set(Window::DimZ, Window::Dimension(0, input->info()->tensor_shape().z(), 1));
-
-    AccessWindowRectangle  input_access(input->info(), -_left_right, -_top_bottom, num_elems_read_per_iteration, num_rows_read_per_iteration);
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-    update_window_and_padding(win, input_access, output_access);
-    output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), output->info()->tensor_shape()));
-
-    ICLKernel::configure(win);
-}
-
-void CLDepthConcatenateKernel::run(const Window &window, cl::CommandQueue &queue)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
-
-    Window slice = window.first_slice_window_3D();
-
-    const int offset_to_first_elements_in_bytes = _depth_offset * _output->info()->strides_in_bytes()[2];
-
-    unsigned int  idx = 2 * num_arguments_per_3D_tensor(); // Skip the input and output parameters
-    const cl_int3 offsets =
-    {
-        {
-            static_cast<cl_int>(_left_right),
-            static_cast<cl_int>(_top_bottom),
-            static_cast<cl_int>(offset_to_first_elements_in_bytes),
-        }
-    };
-    _kernel.setArg<cl_int3>(idx, offsets);
-
-    do
-    {
-        unsigned int idx = 0;
-        add_3D_tensor_argument(idx, _input, slice);
-        add_3D_tensor_argument(idx, _output, slice);
-        enqueue(queue, *this, slice);
-    }
-    while(window.slide_window_slice_3D(slice));
-}
diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
new file mode 100644
index 0000000000..0275d4fd83
--- /dev/null
+++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/IAccessWindow.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+#include "support/ToolchainSupport.h"
+
+#include <map>
+
+using namespace arm_compute;
+
+CLDepthConcatenateLayerKernel::CLDepthConcatenateLayerKernel()
+    : _input(nullptr), _output(nullptr), _top_bottom(0), _left_right(0), _depth_offset(0)
+{
+}
+
+BorderSize CLDepthConcatenateLayerKernel::border_size() const
+{
+    return BorderSize(_top_bottom, _left_right);
+}
+
+void CLDepthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned int depth_offset, ICLTensor *output)
+{
+    static std::map<int, std::pair<std::string, int>> configs_map =
+    {
+        { 1, { "uchar", 16 } },
+        { 2, { "ushort", 8 } },
+        { 4, { "uint", 4 } },
+        { 8, { "ulong", 2 } },
+    };
+
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output);
+    ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) + depth_offset > output->info()->dimension(2));
+    ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) > output->info()->dimension(0));
+    ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) > output->info()->dimension(1));
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(3, input, output);
+    ARM_COMPUTE_ERROR_ON(configs_map.find(input->info()->element_size()) == configs_map.end());
+
+    // The gaps between the two lowest dimensions of input and output need to be divisible by 2
+    // Otherwise it is not clear how the padding should be added onto the input tensor
+    ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) - input->info()->dimension(0)) % 2);
+    ARM_COMPUTE_ERROR_ON((output->info()->dimension(1) - input->info()->dimension(1)) % 2);
+
+    _input        = input;
+    _output       = output;
+    _depth_offset = depth_offset;
+
+    // Add build options
+    auto                  config = configs_map.find(static_cast<int>(input->info()->element_size()));
+    std::set<std::string> build_opts;
+    build_opts.emplace(("-DDATA_TYPE=" + config->second.first));
+    build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(config->second.second)));
+
+    // Create kernel
+    _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("concatenate_depth", build_opts));
+
+    // Configure kernel window
+    _left_right = (output->info()->dimension(0) - input->info()->dimension(0)) / 2;
+    _top_bottom = (output->info()->dimension(1) - input->info()->dimension(1)) / 2;
+
+    const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size();
+    const unsigned int num_elems_read_per_iteration      = 16 / input->info()->element_size();
+    const unsigned int num_rows_read_per_iteration       = 1;
+
+    // The window needs to be based on input as we copy all the depths of input
+    Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
+    win.set(Window::DimZ, Window::Dimension(0, input->info()->tensor_shape().z(), 1));
+
+    AccessWindowRectangle  input_access(input->info(), -_left_right, -_top_bottom, num_elems_read_per_iteration, num_rows_read_per_iteration);
+    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+    update_window_and_padding(win, input_access, output_access);
+    output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), output->info()->tensor_shape()));
+
+    ICLKernel::configure(win);
+}
+
+void CLDepthConcatenateLayerKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window);
+
+    Window slice = window.first_slice_window_3D();
+
+    const int offset_to_first_elements_in_bytes = _depth_offset * _output->info()->strides_in_bytes()[2];
+
+    unsigned int  idx = 2 * num_arguments_per_3D_tensor(); // Skip the input and output parameters
+    const cl_int3 offsets =
+    {
+        {
+            static_cast<cl_int>(_left_right),
+            static_cast<cl_int>(_top_bottom),
+            static_cast<cl_int>(offset_to_first_elements_in_bytes),
+        }
+    };
+    _kernel.setArg<cl_int3>(idx, offsets);
+
+    do
+    {
+        unsigned int idx = 0;
+        add_3D_tensor_argument(idx, _input, slice);
+        add_3D_tensor_argument(idx, _output, slice);
+        enqueue(queue, *this, slice);
+    }
+    while(window.slide_window_slice_3D(slice));
+}
diff --git a/src/core/CL/kernels/CLDepthConvertKernel.cpp b/src/core/CL/kernels/CLDepthConvertKernel.cpp
deleted file mode 100644
index b2132073d5..0000000000
--- a/src/core/CL/kernels/CLDepthConvertKernel.cpp
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/CL/kernels/CLDepthConvertKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/OpenCL.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-
-#include <cstddef>
-#include <set>
-#include <string>
-
-using namespace arm_compute;
-
-void CLDepthConvertKernel::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::U8, DataType::S16, DataType::QS16,
-                                                  DataType::U16, DataType::U32, DataType::S32, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::U8, DataType::S16, DataType::QS16,
-                                                  DataType::U16, DataType::U32, DataType::S32, DataType::F32);
-    ARM_COMPUTE_ERROR_ON(input == output);
-    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == output->info()->data_type(), "Input and output data types must be different");
-    ARM_COMPUTE_ERROR_ON(shift >= 8);
-
-    // Check if convertion is supported
-    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS8 && output->info()->data_type() != DataType::F32,
-                             "Only data types supported [in] QS8 -> [out] F32");
-    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS16 && (output->info()->data_type() != DataType::F32),
-                             "Only data types supported [in] QS16 -> [out] F32");
-    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::F32 && ((output->info()->data_type() != DataType::QS8) && output->info()->data_type() != DataType::QS16),
-                             "Only data types supported [in] F32 -> [out] QS8, QS16");
-    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U8 && (output->info()->data_type() != DataType::U16 && output->info()->data_type() != DataType::S16
-                                                                            && output->info()->data_type() != DataType::U32 && output->info()->data_type() != DataType::S32),
-                             "Only data types supported [in] U8 -> [out] U16, S16, U32, S32");
-
-    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U32
-                                                                             && output->info()->data_type() != DataType::S32),
-                             "Only data types supported [in] U16 ->  [out] U8, U32, S32");
-
-    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::S16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U32
-                                                                             && output->info()->data_type() != DataType::S32),
-                             "Only data types supported [in] S16 ->  [out] U8, U32, S32");
-
-    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U32 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U16
-                                                                             && output->info()->data_type() != DataType::S16),
-                             "Only data types supported [in] U32 ->  [out] U8, U16, S16");
-
-    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::S32 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U16
-                                                                             && output->info()->data_type() != DataType::S16),
-                             "Only data types supported [in] S32 ->  [out] U8, U16, S16");
-
-    // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype must be given)
-    set_shape_if_empty(*output->info(), input->info()->tensor_shape());
-
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
-
-    // Get data sizes
-    const size_t input_size  = data_size_from_type(input->info()->data_type());
-    const size_t output_size = data_size_from_type(output->info()->data_type());
-
-    // Construct kernel name and build options
-    std::string           kernel_name = "convert_depth";
-    std::set<std::string> build_opts;
-    if(input_size > output_size)
-    {
-        kernel_name += "_down";
-        // Down conversions from float always SATURATE as out-of-bounds conversion from float->integer is implementation defined
-        build_opts.insert(((policy == ConvertPolicy::WRAP) && !is_data_type_float(input->info()->data_type())) ? "-DWRAP" : "-DSATURATE");
-    }
-    else
-    {
-        kernel_name += "_up";
-    }
-    build_opts.emplace("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type()));
-    build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
-    if(is_data_type_fixed_point(input->info()->data_type()) || is_data_type_fixed_point(output->info()->data_type()))
-    {
-        build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position()));
-    }
-
-    // Create kernel
-    _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
-
-    // Set shift arg
-    unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
-    _kernel.setArg(idx++, shift);
-
-    // Configure kernel
-    constexpr unsigned int num_elems_processed_per_iteration = 16;
-    ICLSimple2DKernel::configure(input, output, num_elems_processed_per_iteration);
-}
diff --git a/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
new file mode 100644
index 0000000000..83908a1469
--- /dev/null
+++ b/src/core/CL/kernels/CLDepthConvertLayerKernel.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/CL/OpenCL.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+
+#include <cstddef>
+#include <set>
+#include <string>
+
+using namespace arm_compute;
+
+void CLDepthConvertLayerKernel::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::U8, DataType::S16, DataType::QS16,
+                                                  DataType::U16, DataType::U32, DataType::S32, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::U8, DataType::S16, DataType::QS16,
+                                                  DataType::U16, DataType::U32, DataType::S32, DataType::F32);
+    ARM_COMPUTE_ERROR_ON(input == output);
+    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == output->info()->data_type(), "Input and output data types must be different");
+    ARM_COMPUTE_ERROR_ON(shift >= 8);
+
+    // Check if convertion is supported
+    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS8 && output->info()->data_type() != DataType::F32,
+                             "Only data types supported [in] QS8 -> [out] F32");
+    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS16 && (output->info()->data_type() != DataType::F32),
+                             "Only data types supported [in] QS16 -> [out] F32");
+    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::F32 && ((output->info()->data_type() != DataType::QS8) && output->info()->data_type() != DataType::QS16),
+                             "Only data types supported [in] F32 -> [out] QS8, QS16");
+    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U8 && (output->info()->data_type() != DataType::U16 && output->info()->data_type() != DataType::S16
+                                                                            && output->info()->data_type() != DataType::U32 && output->info()->data_type() != DataType::S32),
+                             "Only data types supported [in] U8 -> [out] U16, S16, U32, S32");
+
+    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U32
+                                                                             && output->info()->data_type() != DataType::S32),
+                             "Only data types supported [in] U16 ->  [out] U8, U32, S32");
+
+    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::S16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U32
+                                                                             && output->info()->data_type() != DataType::S32),
+                             "Only data types supported [in] S16 ->  [out] U8, U32, S32");
+
+    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U32 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U16
+                                                                             && output->info()->data_type() != DataType::S16),
+                             "Only data types supported [in] U32 ->  [out] U8, U16, S16");
+
+    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::S32 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U16
+                                                                             && output->info()->data_type() != DataType::S16),
+                             "Only data types supported [in] S32 ->  [out] U8, U16, S16");
+
+    // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype must be given)
+    set_shape_if_empty(*output->info(), input->info()->tensor_shape());
+
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
+
+    // Get data sizes
+    const size_t input_size  = data_size_from_type(input->info()->data_type());
+    const size_t output_size = data_size_from_type(output->info()->data_type());
+
+    // Construct kernel name and build options
+    std::string           kernel_name = "convert_depth";
+    std::set<std::string> build_opts;
+    if(input_size > output_size)
+    {
+        kernel_name += "_down";
+        // Down conversions from float always SATURATE as out-of-bounds conversion from float->integer is implementation defined
+        build_opts.insert(((policy == ConvertPolicy::WRAP) && !is_data_type_float(input->info()->data_type())) ? "-DWRAP" : "-DSATURATE");
+    }
+    else
+    {
+        kernel_name += "_up";
+    }
+    build_opts.emplace("-DDATA_TYPE_IN=" + get_cl_type_from_data_type(input->info()->data_type()));
+    build_opts.emplace("-DDATA_TYPE_OUT=" + get_cl_type_from_data_type(output->info()->data_type()));
+    if(is_data_type_fixed_point(input->info()->data_type()) || is_data_type_fixed_point(output->info()->data_type()))
+    {
+        build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position()));
+    }
+
+    // Create kernel
+    _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
+
+    // Set shift arg
+    unsigned int idx = 2 * num_arguments_per_2D_tensor(); //Skip the input and output parameters
+    _kernel.setArg(idx++, shift);
+
+    // Configure kernel
+    constexpr unsigned int num_elems_processed_per_iteration = 16;
+    ICLSimple2DKernel::configure(input, output, num_elems_processed_per_iteration);
+}
diff --git a/src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp
deleted file mode 100644
index e86c55fbc0..0000000000
--- a/src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLKernel.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
-
-using namespace arm_compute;
-
-namespace
-{
-/** Calculates expected output shape dimension
- *
- * @param[in] Input shape
- *
- * @return Expected output shape
- */
-TensorShape get_output_shape(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info)
-{
-    unsigned int output_width  = 0;
-    unsigned int output_height = 0;
-
-    std::tie(output_width, output_height) = scaled_dimensions(input_shape.x(), input_shape.y(), weights_shape.x(), weights_shape.y(), conv_info);
-
-    TensorShape output_shape = input_shape;
-    output_shape.set(0, output_width);
-    output_shape.set(1, output_height);
-
-    return output_shape;
-}
-} // namespace
-
-CLDepthwiseConvolution3x3Kernel::CLDepthwiseConvolution3x3Kernel()
-    : _border_size(0), _input(), _output(), _weights(), _biases(), _conv_stride_x(0), _conv_stride_y(0), _conv_pad_left(0), _conv_pad_top(0)
-{
-}
-
-BorderSize CLDepthwiseConvolution3x3Kernel::border_size() const
-{
-    return _border_size;
-}
-
-void CLDepthwiseConvolution3x3Kernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
-    ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != 3 || weights->info()->dimension(1) != 3);
-
-    if(biases != nullptr)
-    {
-        if(is_data_type_quantized_asymmetric(weights->info()->data_type()))
-        {
-            ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32);
-        }
-        else
-        {
-            ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases);
-        }
-        ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(2));
-        ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1);
-    }
-
-    // Get convolved dimensions
-    TensorShape output_shape = get_output_shape(input->info()->tensor_shape(), weights->info()->tensor_shape(), conv_info);
-
-    // Output auto inizialitation if not yet initialized
-    auto_init_if_empty(*output->info(),
-                       output_shape,
-                       1,
-                       input->info()->data_type(),
-                       input->info()->fixed_point_position(),
-                       input->info()->quantization_info());
-
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
-
-    _input         = input;
-    _output        = output;
-    _weights       = weights;
-    _biases        = biases;
-    _conv_stride_x = conv_info.stride().first;
-    _conv_stride_y = conv_info.stride().second;
-    _conv_pad_left = conv_info.pad_left();
-    _conv_pad_top  = conv_info.pad_top();
-    _border_size   = BorderSize(_conv_pad_top, conv_info.pad_right(), conv_info.pad_bottom(), _conv_pad_left);
-
-    // Set build options
-    ARM_COMPUTE_ERROR_ON(_conv_stride_x < 1 || _conv_stride_x > 3);
-    CLBuildOptions build_opts;
-    build_opts.add_option("-DCONV_STRIDE_X=" + support::cpp11::to_string(_conv_stride_x));
-    build_opts.add_option_if(_biases != nullptr, "-DHAS_BIAS");
-
-    // Create kernel
-    std::string kernel_name = is_data_type_quantized_asymmetric(_input->info()->data_type()) ? "depthwise_convolution_3x3_quantized" : "depthwise_convolution_3x3";
-    _kernel                 = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
-
-    // Set static arguments
-    if(is_data_type_quantized_asymmetric(_input->info()->data_type()))
-    {
-        float multiplier        = _input->info()->quantization_info().scale * _weights->info()->quantization_info().scale / _output->info()->quantization_info().scale;
-        int   output_multiplier = 0;
-        int   output_shift      = 0;
-        quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
-
-        unsigned int idx = 3 * num_arguments_per_3D_tensor() + ((_biases != nullptr) ? num_arguments_per_1D_tensor() : 0);
-
-        _kernel.setArg(idx++, -_input->info()->quantization_info().offset);
-        _kernel.setArg(idx++, -_weights->info()->quantization_info().offset);
-        _kernel.setArg(idx++, _output->info()->quantization_info().offset);
-        _kernel.setArg(idx++, output_multiplier);
-        _kernel.setArg(idx++, output_shift);
-    }
-
-    // Configure the local work size for Bifrost with a value obtained
-    // via exhaustive autotuning for the MobileNets tensor shapes.
-    const GPUTarget gpu_target = get_arch_from_target(get_target());
-    if(gpu_target == GPUTarget::BIFROST)
-    {
-        const size_t width = input->info()->dimension(0);
-        if(width >= 56) // 56 or 112
-        {
-            _lws_hint = cl::NDRange(8, 5, 2);
-        }
-        else if(width >= 14) // 14 or 28
-        {
-            _lws_hint = cl::NDRange(1, 5, 2);
-        }
-        else // 7
-        {
-            _lws_hint = cl::NDRange(1, 1, 2);
-        }
-    }
-
-    // Configure kernel window
-    const unsigned int num_elems_processed_per_iteration = 2;
-    const unsigned int num_elems_written_per_iteration   = 2;
-    const unsigned int num_elems_read_per_iteration      = 3 + _conv_stride_x;
-    const unsigned int num_rows_read_per_iteration       = 3;
-
-    Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
-
-    AccessWindowRectangle  input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration, _conv_stride_x, _conv_stride_y);
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
-    AccessWindowStatic     weights_access(weights->info(), 0, 0, weights->info()->dimension(0), weights->info()->dimension(1));
-
-    update_window_and_padding(win, input_access, weights_access, output_access);
-
-    output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
-    ICLKernel::configure(win);
-}
-
-void CLDepthwiseConvolution3x3Kernel::run(const Window &window, cl::CommandQueue &queue)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
-    // Create input window and adjust
-    Window win_in = window;
-    win_in.adjust(Window::DimX, -_conv_pad_left, true);
-    win_in.adjust(Window::DimY, -_conv_pad_top, true);
-    win_in.set_dimension_step(Window::DimX, window.x().step() * _conv_stride_x);
-    win_in.set_dimension_step(Window::DimY, window.y().step() * _conv_stride_y);
-
-    Window slice_in      = win_in.first_slice_window_3D();
-    Window slice_out     = window.first_slice_window_3D();
-    Window slice_weights = window.first_slice_window_3D();
-    slice_weights.set_dimension_step(Window::DimX, 0);
-    slice_weights.set_dimension_step(Window::DimY, 0);
-
-    // Set biases
-    if(_biases != nullptr)
-    {
-        unsigned int idx = 3 * num_arguments_per_3D_tensor();
-        Window       slice_biases;
-        slice_biases.use_tensor_dimensions(_biases->info()->tensor_shape());
-        add_1D_tensor_argument(idx, _biases, slice_biases);
-    }
-
-    do
-    {
-        unsigned int idx = 0;
-        add_3D_tensor_argument(idx, _input, slice_in);
-        add_3D_tensor_argument(idx, _output, slice_out);
-        add_3D_tensor_argument(idx, _weights, slice_weights);
-
-        enqueue(queue, *this, slice_out, _lws_hint);
-    }
-    while(window.slide_window_slice_3D(slice_out) && win_in.slide_window_slice_3D(slice_in));
-}
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp
new file mode 100644
index 0000000000..003f1f8330
--- /dev/null
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.cpp
@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h"
+
+#include "arm_compute/core/AccessWindowStatic.h"
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLKernel.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+
+using namespace arm_compute;
+
+namespace
+{
+/** Calculates expected output shape dimension
+ *
+ * @param[in] Input shape
+ *
+ * @return Expected output shape
+ */
+TensorShape get_output_shape(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info)
+{
+    unsigned int output_width  = 0;
+    unsigned int output_height = 0;
+
+    std::tie(output_width, output_height) = scaled_dimensions(input_shape.x(), input_shape.y(), weights_shape.x(), weights_shape.y(), conv_info);
+
+    TensorShape output_shape = input_shape;
+    output_shape.set(0, output_width);
+    output_shape.set(1, output_height);
+
+    return output_shape;
+}
+} // namespace
+
+CLDepthwiseConvolutionLayer3x3Kernel::CLDepthwiseConvolutionLayer3x3Kernel()
+    : _border_size(0), _input(), _output(), _weights(), _biases(), _conv_stride_x(0), _conv_stride_y(0), _conv_pad_left(0), _conv_pad_top(0)
+{
+}
+
+BorderSize CLDepthwiseConvolutionLayer3x3Kernel::border_size() const
+{
+    return _border_size;
+}
+
+void CLDepthwiseConvolutionLayer3x3Kernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
+    ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != 3 || weights->info()->dimension(1) != 3);
+
+    if(biases != nullptr)
+    {
+        if(is_data_type_quantized_asymmetric(weights->info()->data_type()))
+        {
+            ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::S32);
+        }
+        else
+        {
+            ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases);
+        }
+        ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(2));
+        ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1);
+    }
+
+    // Get convolved dimensions
+    TensorShape output_shape = get_output_shape(input->info()->tensor_shape(), weights->info()->tensor_shape(), conv_info);
+
+    // Output auto inizialitation if not yet initialized
+    auto_init_if_empty(*output->info(),
+                       output_shape,
+                       1,
+                       input->info()->data_type(),
+                       input->info()->fixed_point_position(),
+                       input->info()->quantization_info());
+
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
+
+    _input         = input;
+    _output        = output;
+    _weights       = weights;
+    _biases        = biases;
+    _conv_stride_x = conv_info.stride().first;
+    _conv_stride_y = conv_info.stride().second;
+    _conv_pad_left = conv_info.pad_left();
+    _conv_pad_top  = conv_info.pad_top();
+    _border_size   = BorderSize(_conv_pad_top, conv_info.pad_right(), conv_info.pad_bottom(), _conv_pad_left);
+
+    // Set build options
+    ARM_COMPUTE_ERROR_ON(_conv_stride_x < 1 || _conv_stride_x > 3);
+    CLBuildOptions build_opts;
+    build_opts.add_option("-DCONV_STRIDE_X=" + support::cpp11::to_string(_conv_stride_x));
+    build_opts.add_option_if(_biases != nullptr, "-DHAS_BIAS");
+
+    // Create kernel
+    std::string kernel_name = is_data_type_quantized_asymmetric(_input->info()->data_type()) ? "depthwise_convolution_3x3_quantized" : "depthwise_convolution_3x3";
+    _kernel                 = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));
+
+    // Set static arguments
+    if(is_data_type_quantized_asymmetric(_input->info()->data_type()))
+    {
+        float multiplier        = _input->info()->quantization_info().scale * _weights->info()->quantization_info().scale / _output->info()->quantization_info().scale;
+        int   output_multiplier = 0;
+        int   output_shift      = 0;
+        quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
+
+        unsigned int idx = 3 * num_arguments_per_3D_tensor() + ((_biases != nullptr) ? num_arguments_per_1D_tensor() : 0);
+
+        _kernel.setArg(idx++, -_input->info()->quantization_info().offset);
+        _kernel.setArg(idx++, -_weights->info()->quantization_info().offset);
+        _kernel.setArg(idx++, _output->info()->quantization_info().offset);
+        _kernel.setArg(idx++, output_multiplier);
+        _kernel.setArg(idx++, output_shift);
+    }
+
+    // Configure the local work size for Bifrost with a value obtained
+    // via exhaustive autotuning for the MobileNets tensor shapes.
+    const GPUTarget gpu_target = get_arch_from_target(get_target());
+    if(gpu_target == GPUTarget::BIFROST)
+    {
+        const size_t width = input->info()->dimension(0);
+        if(width >= 56) // 56 or 112
+        {
+            _lws_hint = cl::NDRange(8, 5, 2);
+        }
+        else if(width >= 14) // 14 or 28
+        {
+            _lws_hint = cl::NDRange(1, 5, 2);
+        }
+        else // 7
+        {
+            _lws_hint = cl::NDRange(1, 1, 2);
+        }
+    }
+
+    // Configure kernel window
+    const unsigned int num_elems_processed_per_iteration = 2;
+    const unsigned int num_elems_written_per_iteration   = 2;
+    const unsigned int num_elems_read_per_iteration      = 3 + _conv_stride_x;
+    const unsigned int num_rows_read_per_iteration       = 3;
+
+    Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
+
+    AccessWindowRectangle  input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration, _conv_stride_x, _conv_stride_y);
+    AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
+    AccessWindowStatic     weights_access(weights->info(), 0, 0, weights->info()->dimension(0), weights->info()->dimension(1));
+
+    update_window_and_padding(win, input_access, weights_access, output_access);
+
+    output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
+
+    ICLKernel::configure(win);
+}
+
+void CLDepthwiseConvolutionLayer3x3Kernel::run(const Window &window, cl::CommandQueue &queue)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+
+    // Create input window and adjust
+    Window win_in = window;
+    win_in.adjust(Window::DimX, -_conv_pad_left, true);
+    win_in.adjust(Window::DimY, -_conv_pad_top, true);
+    win_in.set_dimension_step(Window::DimX, window.x().step() * _conv_stride_x);
+    win_in.set_dimension_step(Window::DimY, window.y().step() * _conv_stride_y);
+
+    Window slice_in      = win_in.first_slice_window_3D();
+    Window slice_out     = window.first_slice_window_3D();
+    Window slice_weights = window.first_slice_window_3D();
+    slice_weights.set_dimension_step(Window::DimX, 0);
+    slice_weights.set_dimension_step(Window::DimY, 0);
+
+    // Set biases
+    if(_biases != nullptr)
+    {
+        unsigned int idx = 3 * num_arguments_per_3D_tensor();
+        Window       slice_biases;
+        slice_biases.use_tensor_dimensions(_biases->info()->tensor_shape());
+        add_1D_tensor_argument(idx, _biases, slice_biases);
+    }
+
+    do
+    {
+        unsigned int idx = 0;
+        add_3D_tensor_argument(idx, _input, slice_in);
+        add_3D_tensor_argument(idx, _output, slice_out);
+        add_3D_tensor_argument(idx, _weights, slice_weights);
+
+        enqueue(queue, *this, slice_out, _lws_hint);
+    }
+    while(window.slide_window_slice_3D(slice_out) && win_in.slide_window_slice_3D(slice_in));
+}
diff --git a/src/core/CL/kernels/CLL2NormalizeKernel.cpp b/src/core/CL/kernels/CLL2NormalizeKernel.cpp
deleted file mode 100644
index 3e0758c980..0000000000
--- a/src/core/CL/kernels/CLL2NormalizeKernel.cpp
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/CL/kernels/CLL2NormalizeKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/FixedPoint.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-
-#include "support/ToolchainSupport.h"
-
-using namespace arm_compute;
-
-CLL2NormalizeKernel::CLL2NormalizeKernel()
-    : _input(nullptr), _sum(nullptr), _output(nullptr), _axis(0), _epsilon(1e-12)
-{
-}
-
-void CLL2NormalizeKernel::configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, unsigned int axis, float epsilon)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
-
-    // Sum and output tensor auto initialization if not yet initialized
-    auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
-
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-    ARM_COMPUTE_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions");
-    ARM_COMPUTE_ERROR_ON_MSG(axis > 0, "Unsupported reduction axis, Supported axis is 0");
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
-
-    _input   = input;
-    _sum     = sum;
-    _output  = output;
-    _axis    = axis;
-    _epsilon = epsilon;
-
-    const unsigned int num_elems_processed_per_iteration = 16;
-
-    // Set build options
-    std::set<std::string> build_opts;
-    build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
-    build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
-
-    // Create kernel
-    _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("l2_normalize", build_opts));
-
-    // Set epsilon argument
-    unsigned int idx = num_arguments_per_1D_tensor() * 3;
-    _kernel.setArg<cl_uint>(idx, _epsilon);
-
-    // Configure kernel window
-    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
-
-    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
-    update_window_and_padding(win, input_access, output_access);
-    output_access.set_valid_region(win, input->info()->valid_region());
-
-    ICLKernel::configure(win);
-}
-
-void CLL2NormalizeKernel::run(const Window &window, cl::CommandQueue &queue)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
-    Window window_sum(window);
-    window_sum.set(Window::DimX, Window::Dimension(0, 0, 0));
-
-    Window in_slice  = window.first_slice_window_1D();
-    Window sum_slice = window_sum.first_slice_window_1D();
-
-    do
-    {
-        unsigned int idx = 0;
-        add_1D_tensor_argument(idx, _input, in_slice);
-        add_1D_tensor_argument(idx, _sum, sum_slice);
-        add_1D_tensor_argument(idx, _output, in_slice);
-        enqueue(queue, *this, in_slice);
-    }
-    while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice));
-}
diff --git a/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
new file mode 100644
index 0000000000..36e351e048
--- /dev/null
+++ b/src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h"
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/FixedPoint.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+CLL2NormalizeLayerKernel::CLL2NormalizeLayerKernel()
+    : _input(nullptr), _sum(nullptr), _output(nullptr), _axis(0), _epsilon(1e-12)
+{
+}
+
+void CLL2NormalizeLayerKernel::configure(const ICLTensor *input, const ICLTensor *sum, ICLTensor *output, unsigned int axis, float epsilon)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
+
+    // Sum and output tensor auto initialization if not yet initialized
+    auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
+
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+    ARM_COMPUTE_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Reduction axis greater than max number of dimensions");
+    ARM_COMPUTE_ERROR_ON_MSG(axis > 0, "Unsupported reduction axis, Supported axis is 0");
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
+
+    _input   = input;
+    _sum     = sum;
+    _output  = output;
+    _axis    = axis;
+    _epsilon = epsilon;
+
+    const unsigned int num_elems_processed_per_iteration = 16;
+
+    // Set build options
+    std::set<std::string> build_opts;
+    build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
+    build_opts.emplace(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
+
+    // Create kernel
+    _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("l2_normalize", build_opts));
+
+    // Set epsilon argument
+    unsigned int idx = num_arguments_per_1D_tensor() * 3;
+    _kernel.setArg<cl_uint>(idx, _epsilon);
+
+    // Configure kernel window
+    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
+
+    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
+    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+
+    update_window_and_padding(win, input_access, output_access);
+    output_access.set_valid_region(win, input->info()->valid_region());
+
+    ICLKernel::configure(win);
+}
+
+void CLL2NormalizeLayerKernel::run(const Window &window, cl::CommandQueue &queue)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+
+    Window window_sum(window);
+    window_sum.set(Window::DimX, Window::Dimension(0, 0, 0));
+
+    Window in_slice  = window.first_slice_window_1D();
+    Window sum_slice = window_sum.first_slice_window_1D();
+
+    do
+    {
+        unsigned int idx = 0;
+        add_1D_tensor_argument(idx, _input, in_slice);
+        add_1D_tensor_argument(idx, _sum, sum_slice);
+        add_1D_tensor_argument(idx, _output, in_slice);
+        enqueue(queue, *this, in_slice);
+    }
+    while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice));
+}
diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.cpp
deleted file mode 100644
index b90a8e7b89..0000000000
--- a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
-#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-
-#include "support/ToolchainSupport.h"
-
-using namespace arm_compute;
-
-GCDepthConcatenateKernel::GCDepthConcatenateKernel()
-    : _input(nullptr), _output(nullptr), _top_bottom(0), _left_right(0)
-{
-}
-
-BorderSize GCDepthConcatenateKernel::border_size() const
-{
-    return BorderSize(_top_bottom, _left_right);
-}
-
-void GCDepthConcatenateKernel::configure(const IGCTensor *input, unsigned int depth_offset, IGCTensor *output)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-    ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) + depth_offset > output->info()->dimension(2));
-    ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) > output->info()->dimension(0));
-    ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) > output->info()->dimension(1));
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(3, input, output);
-
-    // The gaps between the two lowest dimensions of input and output need to be divisible by 2
-    // Otherwise it is not clear how the padding should be added onto the input tensor
-    ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) - input->info()->dimension(0)) % 2);
-    ARM_COMPUTE_ERROR_ON((output->info()->dimension(1) - input->info()->dimension(1)) % 2);
-
-    _input  = input;
-    _output = output;
-
-    // Add build options
-    std::set<std::string> build_opts;
-    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
-    build_opts.emplace(("#define " + dt_name));
-    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
-    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
-
-    // Configure kernel window
-    _left_right = (output->info()->dimension(0) - input->info()->dimension(0)) / 2;
-    _top_bottom = (output->info()->dimension(1) - input->info()->dimension(1)) / 2;
-
-    const int offset_to_first_elements_in_bytes = depth_offset * output->info()->strides_in_bytes()[2];
-
-    build_opts.emplace("#define OFFSETS_X " + support::cpp11::to_string(_left_right));
-    build_opts.emplace("#define OFFSETS_Y " + support::cpp11::to_string(_top_bottom));
-    build_opts.emplace("#define OFFSETS_Z " + support::cpp11::to_string(offset_to_first_elements_in_bytes));
-
-    // Create kernel
-    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("concatenate_depth", build_opts));
-
-    unsigned int num_elems_processed_per_iteration = 1;
-    unsigned int num_elems_read_per_iteration      = 1;
-    if(input->info()->data_type() == DataType::F32)
-    {
-        num_elems_processed_per_iteration = 1;
-        num_elems_read_per_iteration      = 1;
-    }
-    else if(input->info()->data_type() == DataType::F16)
-    {
-        num_elems_processed_per_iteration = 4;
-        num_elems_read_per_iteration      = 4;
-    }
-    const unsigned int num_rows_read_per_iteration = 1;
-
-    // The window needs to be based on input as we copy all the depths of input
-    Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
-    win.set(Window::DimZ, Window::Dimension(0, input->info()->tensor_shape().z(), 1));
-
-    AccessWindowRectangle  input_access(input->info(), -_left_right, -_top_bottom, num_elems_read_per_iteration, num_rows_read_per_iteration);
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-    update_window_and_padding(win, input_access, output_access);
-    output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), output->info()->tensor_shape()));
-
-    IGCKernel::configure(win);
-}
-
-void GCDepthConcatenateKernel::run(const Window &window)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
-
-    _kernel.use();
-
-    Window slice = window.first_slice_window_3D();
-
-    do
-    {
-        if(_input->info()->data_type() == DataType::F32)
-        {
-            unsigned int idx = 0;
-            add_3D_tensor_argument(idx, _input, 1, slice);
-            add_3D_tensor_argument(idx, _output, 2, slice);
-        }
-        else if(_input->info()->data_type() == DataType::F16)
-        {
-            unsigned int idx = 0;
-            add_3D_tensor_argument(idx, _input, BufferParam(1, 3), slice);
-            add_3D_tensor_argument(idx, _output, BufferParam(2, 3), slice);
-        }
-
-        _kernel.update_shader_params();
-
-        enqueue(*this, slice);
-    }
-    while(window.slide_window_slice_3D(slice));
-}
diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp
new file mode 100644
index 0000000000..a6111782fd
--- /dev/null
+++ b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.cpp
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateLayerKernel.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+GCDepthConcatenateLayerKernel::GCDepthConcatenateLayerKernel()
+    : _input(nullptr), _output(nullptr), _top_bottom(0), _left_right(0)
+{
+}
+
+BorderSize GCDepthConcatenateLayerKernel::border_size() const
+{
+    return BorderSize(_top_bottom, _left_right);
+}
+
+void GCDepthConcatenateLayerKernel::configure(const IGCTensor *input, unsigned int depth_offset, IGCTensor *output)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+    ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) + depth_offset > output->info()->dimension(2));
+    ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) > output->info()->dimension(0));
+    ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) > output->info()->dimension(1));
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(3, input, output);
+
+    // The gaps between the two lowest dimensions of input and output need to be divisible by 2
+    // Otherwise it is not clear how the padding should be added onto the input tensor
+    ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) - input->info()->dimension(0)) % 2);
+    ARM_COMPUTE_ERROR_ON((output->info()->dimension(1) - input->info()->dimension(1)) % 2);
+
+    _input  = input;
+    _output = output;
+
+    // Add build options
+    std::set<std::string> build_opts;
+    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
+    build_opts.emplace(("#define " + dt_name));
+    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
+
+    // Configure kernel window
+    _left_right = (output->info()->dimension(0) - input->info()->dimension(0)) / 2;
+    _top_bottom = (output->info()->dimension(1) - input->info()->dimension(1)) / 2;
+
+    const int offset_to_first_elements_in_bytes = depth_offset * output->info()->strides_in_bytes()[2];
+
+    build_opts.emplace("#define OFFSETS_X " + support::cpp11::to_string(_left_right));
+    build_opts.emplace("#define OFFSETS_Y " + support::cpp11::to_string(_top_bottom));
+    build_opts.emplace("#define OFFSETS_Z " + support::cpp11::to_string(offset_to_first_elements_in_bytes));
+
+    // Create kernel
+    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("concatenate_depth", build_opts));
+
+    unsigned int num_elems_processed_per_iteration = 1;
+    unsigned int num_elems_read_per_iteration      = 1;
+    if(input->info()->data_type() == DataType::F32)
+    {
+        num_elems_processed_per_iteration = 1;
+        num_elems_read_per_iteration      = 1;
+    }
+    else if(input->info()->data_type() == DataType::F16)
+    {
+        num_elems_processed_per_iteration = 4;
+        num_elems_read_per_iteration      = 4;
+    }
+    const unsigned int num_rows_read_per_iteration = 1;
+
+    // The window needs to be based on input as we copy all the depths of input
+    Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
+    win.set(Window::DimZ, Window::Dimension(0, input->info()->tensor_shape().z(), 1));
+
+    AccessWindowRectangle  input_access(input->info(), -_left_right, -_top_bottom, num_elems_read_per_iteration, num_rows_read_per_iteration);
+    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+    update_window_and_padding(win, input_access, output_access);
+    output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), output->info()->tensor_shape()));
+
+    IGCKernel::configure(win);
+}
+
+void GCDepthConcatenateLayerKernel::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
+
+    _kernel.use();
+
+    Window slice = window.first_slice_window_3D();
+
+    do
+    {
+        if(_input->info()->data_type() == DataType::F32)
+        {
+            unsigned int idx = 0;
+            add_3D_tensor_argument(idx, _input, 1, slice);
+            add_3D_tensor_argument(idx, _output, 2, slice);
+        }
+        else if(_input->info()->data_type() == DataType::F16)
+        {
+            unsigned int idx = 0;
+            add_3D_tensor_argument(idx, _input, BufferParam(1, 3), slice);
+            add_3D_tensor_argument(idx, _output, BufferParam(2, 3), slice);
+        }
+
+        _kernel.update_shader_params();
+
+        enqueue(*this, slice);
+    }
+    while(window.slide_window_slice_3D(slice));
+}
diff --git a/src/core/NEON/kernels/NEDepthConcatenateKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateKernel.cpp
deleted file mode 100644
index 7a62b0cb03..0000000000
--- a/src/core/NEON/kernels/NEDepthConcatenateKernel.cpp
+++ /dev/null
@@ -1,170 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/NEON/kernels/NEDepthConcatenateKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/NEFixedPoint.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-
-#include <arm_neon.h>
-#include <cstdint>
-
-using namespace arm_compute;
-
-namespace
-{
-// Overloads of 128-bit vector loads
-uint8x16_t loadq(const uint8_t *ptr)
-{
-    return vld1q_u8(ptr);
-}
-uint16x8_t loadq(const uint16_t *ptr)
-{
-    return vld1q_u16(ptr);
-}
-uint32x4_t loadq(const uint32_t *ptr)
-{
-    return vld1q_u32(ptr);
-}
-// Overloads of 128-bit vector stores
-void storeq(uint8_t *ptr, uint8x16_t val)
-{
-    return vst1q_u8(ptr, val);
-}
-void storeq(uint16_t *ptr, uint16x8_t val)
-{
-    return vst1q_u16(ptr, val);
-}
-void storeq(uint32_t *ptr, uint32x4_t val)
-{
-    return vst1q_u32(ptr, val);
-}
-
-template <typename T>
-void depth_concat(const ITensor *in, ITensor *out, std::pair<int, int> start_xy, int depth_offset, const Window &window)
-{
-    const int start_x = start_xy.first;
-    const int start_y = start_xy.second;
-
-    // Offset input
-    const int input_offset_to_first_elements_in_bytes = in->info()->offset_first_element_in_bytes() - start_x * in->info()->strides_in_bytes()[0] - start_y * in->info()->strides_in_bytes()[1];
-    uint8_t *input_ptr                               = in->buffer() + input_offset_to_first_elements_in_bytes;
-
-    // Offset output
-    const unsigned int output_offset_to_first_elements_in_bytes = out->info()->offset_first_element_in_bytes() + depth_offset * out->info()->strides_in_bytes()[2];
-    uint8_t           *output_ptr                               = out->buffer() + output_offset_to_first_elements_in_bytes;
-
-    Iterator input(in, window);
-    Iterator output(out, window);
-
-    execute_window_loop(window, [&](const Coordinates & id)
-    {
-        const auto in_ptr  = reinterpret_cast<const T *>(input_ptr + input.offset());
-        const auto out_ptr = reinterpret_cast<T *>(output_ptr + output.offset());
-
-        storeq(out_ptr, loadq(in_ptr));
-    },
-    input, output);
-}
-} // namespace
-
-NEDepthConcatenateKernel::NEDepthConcatenateKernel()
-    : _func(nullptr), _input(nullptr), _output(nullptr), _top_bottom(0), _left_right(0), _depth_offset(0)
-{
-}
-
-BorderSize NEDepthConcatenateKernel::border_size() const
-{
-    return BorderSize(_top_bottom, _left_right);
-}
-
-void NEDepthConcatenateKernel::configure(const ITensor *input, unsigned int depth_offset, ITensor *output)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output);
-    ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) + depth_offset > output->info()->dimension(2));
-    ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) > output->info()->dimension(0));
-    ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) > output->info()->dimension(1));
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(3, input, output);
-
-    // The gaps between the two lowest dimensions of input and output need to be divisible by 2
-    // Otherwise it is not clear how the padding should be added onto the input tensor
-    ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) - input->info()->dimension(0)) % 2);
-    ARM_COMPUTE_ERROR_ON((output->info()->dimension(1) - input->info()->dimension(1)) % 2);
-
-    _func         = nullptr;
-    _input        = input;
-    _output       = output;
-    _depth_offset = depth_offset;
-    _left_right   = (output->info()->dimension(0) - input->info()->dimension(0)) / 2;
-    _top_bottom   = (output->info()->dimension(1) - input->info()->dimension(1)) / 2;
-
-    switch(input->info()->data_type())
-    {
-        case DataType::QS8:
-            _func = &depth_concat<uint8_t>;
-            break;
-        case DataType::QS16:
-        case DataType::F16:
-            _func = &depth_concat<uint16_t>;
-            break;
-        case DataType::F32:
-            _func = &depth_concat<uint32_t>;
-            break;
-        default:
-            ARM_COMPUTE_ERROR("Unsupported data type.");
-    }
-
-    const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size();
-    const unsigned int num_elems_read_per_iteration      = 16 / input->info()->element_size();
-    const unsigned int num_rows_read_per_iteration       = 1;
-
-    // The window needs to be based on input as we copy all the depths of input
-    Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
-    win.set(Window::DimZ, Window::Dimension(0, input->info()->tensor_shape().z(), 1));
-
-    AccessWindowRectangle  input_access(input->info(), -_left_right, -_top_bottom, num_elems_read_per_iteration, num_rows_read_per_iteration);
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-    update_window_and_padding(win, input_access, output_access);
-    output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), output->info()->tensor_shape()));
-
-    INEKernel::configure(win);
-}
-
-void NEDepthConcatenateKernel::run(const Window &window, const ThreadInfo &info)
-{
-    ARM_COMPUTE_UNUSED(info);
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
-    ARM_COMPUTE_ERROR_ON(_func == nullptr);
-
-    (*_func)(_input, _output, std::make_pair(_left_right, _top_bottom), _depth_offset, window);
-}
diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
new file mode 100644
index 0000000000..01b0f10f70
--- /dev/null
+++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp
@@ -0,0 +1,170 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/NEON/kernels/NEDepthConcatenateLayerKernel.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/IAccessWindow.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/NEON/NEFixedPoint.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+#include <arm_neon.h>
+#include <cstdint>
+
+using namespace arm_compute;
+
+namespace
+{
+// Overloads of 128-bit vector loads
+uint8x16_t loadq(const uint8_t *ptr)
+{
+    return vld1q_u8(ptr);
+}
+uint16x8_t loadq(const uint16_t *ptr)
+{
+    return vld1q_u16(ptr);
+}
+uint32x4_t loadq(const uint32_t *ptr)
+{
+    return vld1q_u32(ptr);
+}
+// Overloads of 128-bit vector stores
+void storeq(uint8_t *ptr, uint8x16_t val)
+{
+    return vst1q_u8(ptr, val);
+}
+void storeq(uint16_t *ptr, uint16x8_t val)
+{
+    return vst1q_u16(ptr, val);
+}
+void storeq(uint32_t *ptr, uint32x4_t val)
+{
+    return vst1q_u32(ptr, val);
+}
+
+template <typename T>
+void depth_concat(const ITensor *in, ITensor *out, std::pair<int, int> start_xy, int depth_offset, const Window &window)
+{
+    const int start_x = start_xy.first;
+    const int start_y = start_xy.second;
+
+    // Offset input
+    const int input_offset_to_first_elements_in_bytes = in->info()->offset_first_element_in_bytes() - start_x * in->info()->strides_in_bytes()[0] - start_y * in->info()->strides_in_bytes()[1];
+    uint8_t *input_ptr                               = in->buffer() + input_offset_to_first_elements_in_bytes;
+
+    // Offset output
+    const unsigned int output_offset_to_first_elements_in_bytes = out->info()->offset_first_element_in_bytes() + depth_offset * out->info()->strides_in_bytes()[2];
+    uint8_t           *output_ptr                               = out->buffer() + output_offset_to_first_elements_in_bytes;
+
+    Iterator input(in, window);
+    Iterator output(out, window);
+
+    execute_window_loop(window, [&](const Coordinates & id)
+    {
+        const auto in_ptr  = reinterpret_cast<const T *>(input_ptr + input.offset());
+        const auto out_ptr = reinterpret_cast<T *>(output_ptr + output.offset());
+
+        storeq(out_ptr, loadq(in_ptr));
+    },
+    input, output);
+}
+} // namespace
+
+NEDepthConcatenateLayerKernel::NEDepthConcatenateLayerKernel()
+    : _func(nullptr), _input(nullptr), _output(nullptr), _top_bottom(0), _left_right(0), _depth_offset(0)
+{
+}
+
+BorderSize NEDepthConcatenateLayerKernel::border_size() const
+{
+    return BorderSize(_top_bottom, _left_right);
+}
+
+void NEDepthConcatenateLayerKernel::configure(const ITensor *input, unsigned int depth_offset, ITensor *output)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output);
+    ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) + depth_offset > output->info()->dimension(2));
+    ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) > output->info()->dimension(0));
+    ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) > output->info()->dimension(1));
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(3, input, output);
+
+    // The gaps between the two lowest dimensions of input and output need to be divisible by 2
+    // Otherwise it is not clear how the padding should be added onto the input tensor
+    ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) - input->info()->dimension(0)) % 2);
+    ARM_COMPUTE_ERROR_ON((output->info()->dimension(1) - input->info()->dimension(1)) % 2);
+
+    _func         = nullptr;
+    _input        = input;
+    _output       = output;
+    _depth_offset = depth_offset;
+    _left_right   = (output->info()->dimension(0) - input->info()->dimension(0)) / 2;
+    _top_bottom   = (output->info()->dimension(1) - input->info()->dimension(1)) / 2;
+
+    switch(input->info()->data_type())
+    {
+        case DataType::QS8:
+            _func = &depth_concat<uint8_t>;
+            break;
+        case DataType::QS16:
+        case DataType::F16:
+            _func = &depth_concat<uint16_t>;
+            break;
+        case DataType::F32:
+            _func = &depth_concat<uint32_t>;
+            break;
+        default:
+            ARM_COMPUTE_ERROR("Unsupported data type.");
+    }
+
+    const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size();
+    const unsigned int num_elems_read_per_iteration      = 16 / input->info()->element_size();
+    const unsigned int num_rows_read_per_iteration       = 1;
+
+    // The window needs to be based on input as we copy all the depths of input
+    Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
+    win.set(Window::DimZ, Window::Dimension(0, input->info()->tensor_shape().z(), 1));
+
+    AccessWindowRectangle  input_access(input->info(), -_left_right, -_top_bottom, num_elems_read_per_iteration, num_rows_read_per_iteration);
+    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+    update_window_and_padding(win, input_access, output_access);
+    output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), output->info()->tensor_shape()));
+
+    INEKernel::configure(win);
+}
+
+void NEDepthConcatenateLayerKernel::run(const Window &window, const ThreadInfo &info)
+{
+    ARM_COMPUTE_UNUSED(info);
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
+    ARM_COMPUTE_ERROR_ON(_func == nullptr);
+
+    (*_func)(_input, _output, std::make_pair(_left_right, _top_bottom), _depth_offset, window);
+}
diff --git a/src/core/NEON/kernels/NEDepthConvertKernel.cpp b/src/core/NEON/kernels/NEDepthConvertKernel.cpp
deleted file mode 100644
index d97a20be65..0000000000
--- a/src/core/NEON/kernels/NEDepthConvertKernel.cpp
+++ /dev/null
@@ -1,524 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/NEON/kernels/NEDepthConvertKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/NEFixedPoint.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-
-#include <arm_neon.h>
-
-using namespace arm_compute;
-
-namespace arm_compute
-{
-class Coordinates;
-} // namespace arm_compute
-
-NEDepthConvertKernel::NEDepthConvertKernel()
-    : _input(nullptr), _output(nullptr), _policy(), _shift(0), _fixed_point_position_input(0), _fixed_point_position_output(0)
-{
-}
-
-void NEDepthConvertKernel::configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QS8, DataType::S16, DataType::U16, DataType::QS16, DataType::F32);
-
-    _input  = input;
-    _output = input;
-    _policy = policy;
-    _shift  = shift;
-
-    if(output != nullptr)
-    {
-        // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype must be given)
-        set_shape_if_empty(*output->info(), input->info()->tensor_shape());
-
-        ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QS8, DataType::S16, DataType::U16, DataType::QS16, DataType::U32, DataType::S32, DataType::F32);
-        ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
-
-        // Set output
-        _output = output;
-    }
-
-    // Set initial fixed point position of input and output
-    _fixed_point_position_input  = input->info()->fixed_point_position();
-    _fixed_point_position_output = _output->info()->fixed_point_position();
-
-    // Set the fixed point position to the output tensor if needed
-    if(is_data_type_fixed_point(input->info()->data_type()) && is_data_type_fixed_point(_output->info()->data_type()))
-    {
-        // If in-place set the fixed point position of the output tensor to be equal to shift
-        _fixed_point_position_output = (_input == _output) ? static_cast<int>(_shift) : _fixed_point_position_output;
-        // Set fixed point position to output tensor
-        _output->info()->set_fixed_point_position(_fixed_point_position_output);
-    }
-
-    ARM_COMPUTE_ERROR_ON(shift >= 8 && (!is_data_type_fixed_point(input->info()->data_type()) && !is_data_type_fixed_point(output->info()->data_type())));
-    ARM_COMPUTE_ERROR_ON(input == output && (data_size_from_type(input->info()->data_type()) != data_size_from_type(output->info()->data_type())));
-
-    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U8 && (output->info()->data_type() != DataType::S16 && output->info()->data_type() != DataType::U16
-                                                                            && output->info()->data_type() != DataType::S32),
-                             "Only data_types supported [in] U8 -> [out] U16, S16, S32");
-
-    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS8 && (output->info()->data_type() != DataType::QS8 && output->info()->data_type() != DataType::F32),
-                             "Only data_types supported [in] QS8 ->  [out] QS8, F32");
-
-    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U32),
-                             "Only data_types supported [in] U16 ->  [out] U8, U32");
-
-    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::S16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::S32),
-                             "Only data_types supported [in] S16 ->  [out] U8, S32");
-
-    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS16 && (output->info()->data_type() != DataType::QS16 && output->info()->data_type() != DataType::F32),
-                             "Only data_types supported [in] QS16 ->  [out] QS16, F32");
-
-    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::F32 && (output->info()->data_type() != DataType::QS8 && output->info()->data_type() != DataType::QS16),
-                             "Only data_types supported [in] F32 ->  [out] QS8, QS16");
-
-    constexpr unsigned int num_elems_processed_per_iteration = 16;
-
-    // Configure kernel window
-    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
-
-    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
-    if(output != nullptr)
-    {
-        AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-        update_window_and_padding(win, input_access, output_access);
-        output_access.set_valid_region(win, input->info()->valid_region());
-    }
-    else
-    {
-        // In-place computation
-        update_window_and_padding(win, input_access);
-    }
-    ICPPKernel::configure(win);
-}
-
-void NEDepthConvertKernel::run(const Window &window, const ThreadInfo &info)
-{
-    ARM_COMPUTE_UNUSED(info);
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-    ARM_COMPUTE_ERROR_ON(nullptr == _input);
-    ARM_COMPUTE_ERROR_ON(nullptr == _output);
-    ARM_COMPUTE_ERROR_ON(_input == _output);
-
-    Iterator input(_input, window);
-    Iterator output(_output, window);
-
-    bool in_place = (_input == _output);
-
-    switch(_input->info()->data_type())
-    {
-        case DataType::U8:
-        {
-            const int16x8_t b = vdupq_n_s16(_shift);
-
-            switch(_output->info()->data_type())
-            {
-                case DataType::S16:
-                {
-                    /* Up-conversion U8 -> S16 */
-                    execute_window_loop(window, [&](const Coordinates & id)
-                    {
-                        const uint8x16_t texels_u8 = vld1q_u8(input.ptr());
-
-                        const int16x8x2_t texels =
-                        {
-                            {
-                                vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(texels_u8))), b),
-                                vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8))), b)
-                            }
-                        };
-
-                        vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), texels.val[0]);
-                        vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, texels.val[1]);
-                    },
-                    input, output);
-                    break;
-                }
-                case DataType::S32:
-                {
-                    /* Up-conversion U8 -> S32 */
-                    execute_window_loop(window, [&](const Coordinates & id)
-                    {
-                        const uint8x16_t texels_u8 = vld1q_u8(input.ptr());
-
-                        const int16x8x2_t texels =
-                        {
-                            {
-                                vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(texels_u8))), b),
-                                vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8))), b)
-                            }
-                        };
-
-                        vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()), vmovl_s16(vget_low_s16(texels.val[0])));
-                        vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 4, vmovl_s16(vget_high_s16(texels.val[0])));
-                        vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 8, vmovl_s16(vget_low_s16(texels.val[1])));
-                        vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 12, vmovl_s16(vget_high_s16(texels.val[1])));
-                    },
-                    input, output);
-                    break;
-                }
-                case DataType::U16:
-                {
-                    /* Up-conversion U8 -> U16 */
-                    execute_window_loop(window, [&](const Coordinates & id)
-                    {
-                        const uint8x16_t texels_u8 = vld1q_u8(input.ptr());
-
-                        const uint16x8x2_t texels =
-                        {
-                            {
-                                vshlq_u16(vmovl_u8(vget_low_u8(texels_u8)), b),
-                                vshlq_u16(vmovl_u8(vget_high_u8(texels_u8)), b)
-                            }
-                        };
-
-                        vst1q_u16(reinterpret_cast<uint16_t *>(output.ptr()), texels.val[0]);
-                        vst1q_u16(reinterpret_cast<uint16_t *>(output.ptr()) + 8, texels.val[1]);
-                    },
-                    input, output);
-                    break;
-                }
-                default:
-                    ARM_COMPUTE_ERROR("Output data type not supported");
-            }
-            break;
-        }
-        case DataType::QS8:
-        {
-            switch(_output->info()->data_type())
-            {
-                case DataType::QS8:
-                {
-                    const int relative_shift = _fixed_point_position_output - _fixed_point_position_input;
-                    /* Fixed point position conversion QS8 -> QS8 */
-                    if(relative_shift != 0 || !in_place)
-                    {
-                        const auto relative_shift_vec = vdupq_n_qs8(relative_shift);
-                        execute_window_loop(window, [&](const Coordinates & id)
-                        {
-                            const qint8x16_t texels_qs8 = vld1q_qs8(reinterpret_cast<const qint8_t *>(input.ptr()));
-                            vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), vqrshlq_s8(texels_qs8, relative_shift_vec));
-                        },
-                        input, output);
-                    }
-                    break;
-                }
-                case DataType::F32:
-                {
-                    /* Up-conversion QS8 -> F32 */
-                    execute_window_loop(window, [&](const Coordinates & id)
-                    {
-                        const qint8x16_t texels_qs8 = vld1q_qs8(reinterpret_cast<const qint8_t *>(input.ptr()));
-
-                        float32x4x2_t texels_low  = vcvt_f32_qs8(vget_low_s8(texels_qs8), _fixed_point_position_input);
-                        float32x4x2_t texels_high = vcvt_f32_qs8(vget_high_s8(texels_qs8), _fixed_point_position_input);
-
-                        vst1q_f32(reinterpret_cast<float *>(output.ptr()), texels_low.val[0]);
-                        vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 4, texels_low.val[1]);
-                        vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 8, texels_high.val[0]);
-                        vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 12, texels_high.val[1]);
-                    },
-                    input, output);
-                    break;
-                }
-                default:
-                    ARM_COMPUTE_ERROR("Output data type not supported");
-            }
-            break;
-        }
-        case DataType::S16:
-        {
-            switch(_output->info()->data_type())
-            {
-                case DataType::U8:
-                {
-                    const int16x8_t b = vdupq_n_s16(-static_cast<int16_t>(_shift));
-
-                    /* Down-conversion S16 -> U8 */
-                    if(ConvertPolicy::SATURATE == _policy)
-                    {
-                        execute_window_loop(window, [&](const Coordinates & id)
-                        {
-                            const int16x8x2_t texels =
-                            {
-                                {
-                                    vqshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr())), b),
-                                    vqshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr()) + 8), b)
-                                }
-                            };
-
-                            vst1q_u8(output.ptr(), vcombine_u8(vqmovun_s16(texels.val[0]), vqmovun_s16(texels.val[1])));
-                        },
-                        input, output);
-                    }
-                    else
-                    {
-                        execute_window_loop(window, [&](const Coordinates & id)
-                        {
-                            const int16x8x2_t texels =
-                            {
-                                {
-                                    vshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr())), b),
-                                    vshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr()) + 8), b)
-                                }
-                            };
-
-                            vst1q_u8(output.ptr(), vcombine_u8(vmovn_u16(vreinterpretq_u16_s16(texels.val[0])),
-                                                               vmovn_u16(vreinterpretq_u16_s16(texels.val[1]))));
-                        },
-                        input, output);
-                    }
-                    break;
-                }
-                case DataType::S32:
-                {
-                    const int32x4_t b = vdupq_n_s32(_shift);
-
-                    /* Up-conversion S16 -> S32 */
-                    execute_window_loop(window, [&](const Coordinates & id)
-                    {
-                        const int16x8x2_t texels =
-                        {
-                            {
-                                vld1q_s16(reinterpret_cast<int16_t *>(input.ptr())),
-                                vld1q_s16(reinterpret_cast<int16_t *>(input.ptr()) + 8)
-                            }
-                        };
-
-                        const int32x4x4_t texels_s32 =
-                        {
-                            {
-                                vshlq_s32(vmovl_s16(vget_low_s16(texels.val[0])), b),
-                                vshlq_s32(vmovl_s16(vget_high_s16(texels.val[0])), b),
-                                vshlq_s32(vmovl_s16(vget_low_s16(texels.val[1])), b),
-                                vshlq_s32(vmovl_s16(vget_high_s16(texels.val[1])), b)
-                            }
-                        };
-
-                        vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()), texels_s32.val[0]);
-                        vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 4, texels_s32.val[1]);
-                        vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 8, texels_s32.val[2]);
-                        vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 12, texels_s32.val[3]);
-                    },
-                    input, output);
-                    break;
-                }
-                default:
-                    ARM_COMPUTE_ERROR("Output data type not supported");
-            }
-            break;
-        }
-        case DataType::U16:
-        {
-            switch(_output->info()->data_type())
-            {
-                case DataType::U8:
-                {
-                    const int16x8_t b = vdupq_n_s16(-static_cast<int16_t>(_shift));
-
-                    /* Down-conversion U16 -> U8 */
-                    if(ConvertPolicy::SATURATE == _policy)
-                    {
-                        execute_window_loop(window, [&](const Coordinates & id)
-                        {
-                            const uint16x8x2_t texels =
-                            {
-                                {
-                                    vqshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr())), b),
-                                    vqshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr()) + 8), b)
-                                }
-                            };
-
-                            vst1q_u8(output.ptr(), vcombine_u8(vqmovn_u16(texels.val[0]), vqmovn_u16(texels.val[1])));
-                        },
-                        input, output);
-                    }
-                    else
-                    {
-                        execute_window_loop(window, [&](const Coordinates & id)
-                        {
-                            const uint16x8x2_t texels =
-                            {
-                                {
-                                    vshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr())), b),
-                                    vshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr()) + 8), b)
-                                }
-                            };
-
-                            vst1q_u8(output.ptr(), vcombine_u8(vmovn_u16(texels.val[0]), vmovn_u16(texels.val[1])));
-                        },
-                        input, output);
-                    }
-                    break;
-                }
-                case DataType::U32:
-                {
-                    const int32x4_t b = vdupq_n_s32(_shift);
-
-                    /* Up-conversion U16 -> U32 */
-                    execute_window_loop(window, [&](const Coordinates & id)
-                    {
-                        const uint16x8x2_t texels =
-                        {
-                            {
-                                vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr())),
-                                vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr()) + 8)
-                            }
-                        };
-
-                        vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()), vshlq_u32(vmovl_u16(vget_low_u16(texels.val[0])), b));
-                        vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()) + 4, vshlq_u32(vmovl_u16(vget_high_u16(texels.val[0])), b));
-                        vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()) + 8, vshlq_u32(vmovl_u16(vget_low_u16(texels.val[1])), b));
-                        vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()) + 12, vshlq_u32(vmovl_u16(vget_high_u16(texels.val[1])), b));
-                    },
-                    input, output);
-                    break;
-                }
-                default:
-                    ARM_COMPUTE_ERROR("Output data type not supported");
-            }
-            break;
-        }
-        case DataType::QS16:
-        {
-            switch(_output->info()->data_type())
-            {
-                case DataType::QS16:
-                {
-                    const int relative_shift = _fixed_point_position_output - _fixed_point_position_input;
-                    /* Fixed point position conversion QS16 -> QS16 */
-                    if(relative_shift != 0 || !in_place)
-                    {
-                        const auto relative_shift_vec = vdupq_n_qs16(relative_shift);
-                        execute_window_loop(window, [&](const Coordinates & id)
-                        {
-                            const qint16x8x2_t texels_qs16 =
-                            {
-                                {
-                                    vld1q_qs16(reinterpret_cast<qint16_t *>(input.ptr())),
-                                    vld1q_qs16(reinterpret_cast<qint16_t *>(input.ptr()) + 8)
-                                }
-                            };
-                            vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()), vqrshlq_s16(texels_qs16.val[0], relative_shift_vec));
-                            vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()) + 8, vqrshlq_s16(texels_qs16.val[1], relative_shift_vec));
-                        },
-                        input, output);
-                    }
-                    break;
-                }
-                case DataType::F32:
-                {
-                    /* Up-conversion QS16 -> F32 */
-                    execute_window_loop(window, [&](const Coordinates & id)
-                    {
-                        const int16x8x2_t texels_qs16 =
-                        {
-                            {
-                                vld1q_s16(reinterpret_cast<qint16_t *>(input.ptr())),
-                                vld1q_s16(reinterpret_cast<qint16_t *>(input.ptr()) + 8)
-                            }
-                        };
-
-                        vst1q_f32(reinterpret_cast<float *>(output.ptr()), vcvt_f32_qs16(vget_low_s16(texels_qs16.val[0]), _fixed_point_position_input));
-                        vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 4, vcvt_f32_qs16(vget_high_s16(texels_qs16.val[0]), _fixed_point_position_input));
-                        vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 8, vcvt_f32_qs16(vget_low_s16(texels_qs16.val[1]), _fixed_point_position_input));
-                        vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 12, vcvt_f32_qs16(vget_high_s16(texels_qs16.val[1]), _fixed_point_position_input));
-                    },
-                    input, output);
-                    break;
-                }
-                default:
-                    ARM_COMPUTE_ERROR("Output data type not supported");
-            }
-            break;
-        }
-        case DataType::F32:
-        {
-            switch(_output->info()->data_type())
-            {
-                case DataType::QS8:
-                {
-                    /* Down-conversion F32 -> QS8 */
-                    execute_window_loop(window, [&](const Coordinates & id)
-                    {
-                        const float32x4x4_t texels_f32 =
-                        {
-                            {
-                                vld1q_f32(reinterpret_cast<const float *>(input.ptr())),
-                                vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 4),
-                                vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 8),
-                                vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 12)
-                            }
-                        };
-
-                        const qint8x16_t texels_s8 = vqcvtq_qs8_f32(texels_f32, _fixed_point_position_output);
-
-                        vst1q_s8(reinterpret_cast<int8_t *>(output.ptr()), texels_s8);
-                    },
-                    input, output);
-                    break;
-                }
-                case DataType::QS16:
-                {
-                    /* Down-conversion F32 -> QS16 */
-                    execute_window_loop(window, [&](const Coordinates & id)
-                    {
-                        const float32x4x2_t texels_f32_1 =
-                        {
-                            {
-                                vld1q_f32(reinterpret_cast<const float *>(input.ptr())),
-                                vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 4),
-                            }
-                        };
-                        const float32x4x2_t texels_f32_2 =
-                        {
-                            {
-                                vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 8),
-                                vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 12)
-                            }
-                        };
-
-                        vst1q_s16(reinterpret_cast<qint16_t *>(output.ptr()), vqcvtq_qs16_f32(texels_f32_1, _fixed_point_position_output));
-                        vst1q_s16(reinterpret_cast<qint16_t *>(output.ptr()) + 8, vqcvtq_qs16_f32(texels_f32_2, _fixed_point_position_output));
-                    },
-                    input, output);
-                    break;
-                }
-                default:
-                    ARM_COMPUTE_ERROR("Output data type not supported");
-            }
-            break;
-        }
-        default:
-            ARM_COMPUTE_ERROR("Not supported");
-    }
-}
diff --git a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp
new file mode 100644
index 0000000000..c29cb57513
--- /dev/null
+++ b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp
@@ -0,0 +1,524 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/NEON/NEFixedPoint.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+
+#include <arm_neon.h>
+
+using namespace arm_compute;
+
+namespace arm_compute
+{
+class Coordinates;
+} // namespace arm_compute
+
+NEDepthConvertLayerKernel::NEDepthConvertLayerKernel()
+    : _input(nullptr), _output(nullptr), _policy(), _shift(0), _fixed_point_position_input(0), _fixed_point_position_output(0)
+{
+}
+
+void NEDepthConvertLayerKernel::configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::QS8, DataType::S16, DataType::U16, DataType::QS16, DataType::F32);
+
+    _input  = input;
+    _output = input;
+    _policy = policy;
+    _shift  = shift;
+
+    if(output != nullptr)
+    {
+        // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype must be given)
+        set_shape_if_empty(*output->info(), input->info()->tensor_shape());
+
+        ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QS8, DataType::S16, DataType::U16, DataType::QS16, DataType::U32, DataType::S32, DataType::F32);
+        ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
+
+        // Set output
+        _output = output;
+    }
+
+    // Set initial fixed point position of input and output
+    _fixed_point_position_input  = input->info()->fixed_point_position();
+    _fixed_point_position_output = _output->info()->fixed_point_position();
+
+    // Set the fixed point position to the output tensor if needed
+    if(is_data_type_fixed_point(input->info()->data_type()) && is_data_type_fixed_point(_output->info()->data_type()))
+    {
+        // If in-place set the fixed point position of the output tensor to be equal to shift
+        _fixed_point_position_output = (_input == _output) ? static_cast<int>(_shift) : _fixed_point_position_output;
+        // Set fixed point position to output tensor
+        _output->info()->set_fixed_point_position(_fixed_point_position_output);
+    }
+
+    ARM_COMPUTE_ERROR_ON(shift >= 8 && (!is_data_type_fixed_point(input->info()->data_type()) && !is_data_type_fixed_point(output->info()->data_type())));
+    ARM_COMPUTE_ERROR_ON(input == output && (data_size_from_type(input->info()->data_type()) != data_size_from_type(output->info()->data_type())));
+
+    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U8 && (output->info()->data_type() != DataType::S16 && output->info()->data_type() != DataType::U16
+                                                                            && output->info()->data_type() != DataType::S32),
+                             "Only data_types supported [in] U8 -> [out] U16, S16, S32");
+
+    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS8 && (output->info()->data_type() != DataType::QS8 && output->info()->data_type() != DataType::F32),
+                             "Only data_types supported [in] QS8 ->  [out] QS8, F32");
+
+    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U32),
+                             "Only data_types supported [in] U16 ->  [out] U8, U32");
+
+    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::S16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::S32),
+                             "Only data_types supported [in] S16 ->  [out] U8, S32");
+
+    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::QS16 && (output->info()->data_type() != DataType::QS16 && output->info()->data_type() != DataType::F32),
+                             "Only data_types supported [in] QS16 ->  [out] QS16, F32");
+
+    ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::F32 && (output->info()->data_type() != DataType::QS8 && output->info()->data_type() != DataType::QS16),
+                             "Only data_types supported [in] F32 ->  [out] QS8, QS16");
+
+    constexpr unsigned int num_elems_processed_per_iteration = 16;
+
+    // Configure kernel window
+    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
+
+    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
+    if(output != nullptr)
+    {
+        AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+        update_window_and_padding(win, input_access, output_access);
+        output_access.set_valid_region(win, input->info()->valid_region());
+    }
+    else
+    {
+        // In-place computation
+        update_window_and_padding(win, input_access);
+    }
+    ICPPKernel::configure(win);
+}
+
+void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info)
+{
+    ARM_COMPUTE_UNUSED(info);
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+    ARM_COMPUTE_ERROR_ON(nullptr == _input);
+    ARM_COMPUTE_ERROR_ON(nullptr == _output);
+    ARM_COMPUTE_ERROR_ON(_input == _output);
+
+    Iterator input(_input, window);
+    Iterator output(_output, window);
+
+    bool in_place = (_input == _output);
+
+    switch(_input->info()->data_type())
+    {
+        case DataType::U8:
+        {
+            const int16x8_t b = vdupq_n_s16(_shift);
+
+            switch(_output->info()->data_type())
+            {
+                case DataType::S16:
+                {
+                    /* Up-conversion U8 -> S16 */
+                    execute_window_loop(window, [&](const Coordinates & id)
+                    {
+                        const uint8x16_t texels_u8 = vld1q_u8(input.ptr());
+
+                        const int16x8x2_t texels =
+                        {
+                            {
+                                vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(texels_u8))), b),
+                                vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8))), b)
+                            }
+                        };
+
+                        vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), texels.val[0]);
+                        vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, texels.val[1]);
+                    },
+                    input, output);
+                    break;
+                }
+                case DataType::S32:
+                {
+                    /* Up-conversion U8 -> S32 */
+                    execute_window_loop(window, [&](const Coordinates & id)
+                    {
+                        const uint8x16_t texels_u8 = vld1q_u8(input.ptr());
+
+                        const int16x8x2_t texels =
+                        {
+                            {
+                                vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(texels_u8))), b),
+                                vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8))), b)
+                            }
+                        };
+
+                        vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()), vmovl_s16(vget_low_s16(texels.val[0])));
+                        vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 4, vmovl_s16(vget_high_s16(texels.val[0])));
+                        vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 8, vmovl_s16(vget_low_s16(texels.val[1])));
+                        vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 12, vmovl_s16(vget_high_s16(texels.val[1])));
+                    },
+                    input, output);
+                    break;
+                }
+                case DataType::U16:
+                {
+                    /* Up-conversion U8 -> U16 */
+                    execute_window_loop(window, [&](const Coordinates & id)
+                    {
+                        const uint8x16_t texels_u8 = vld1q_u8(input.ptr());
+
+                        const uint16x8x2_t texels =
+                        {
+                            {
+                                vshlq_u16(vmovl_u8(vget_low_u8(texels_u8)), b),
+                                vshlq_u16(vmovl_u8(vget_high_u8(texels_u8)), b)
+                            }
+                        };
+
+                        vst1q_u16(reinterpret_cast<uint16_t *>(output.ptr()), texels.val[0]);
+                        vst1q_u16(reinterpret_cast<uint16_t *>(output.ptr()) + 8, texels.val[1]);
+                    },
+                    input, output);
+                    break;
+                }
+                default:
+                    ARM_COMPUTE_ERROR("Output data type not supported");
+            }
+            break;
+        }
+        case DataType::QS8:
+        {
+            switch(_output->info()->data_type())
+            {
+                case DataType::QS8:
+                {
+                    const int relative_shift = _fixed_point_position_output - _fixed_point_position_input;
+                    /* Fixed point position conversion QS8 -> QS8 */
+                    if(relative_shift != 0 || !in_place)
+                    {
+                        const auto relative_shift_vec = vdupq_n_qs8(relative_shift);
+                        execute_window_loop(window, [&](const Coordinates & id)
+                        {
+                            const qint8x16_t texels_qs8 = vld1q_qs8(reinterpret_cast<const qint8_t *>(input.ptr()));
+                            vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), vqrshlq_s8(texels_qs8, relative_shift_vec));
+                        },
+                        input, output);
+                    }
+                    break;
+                }
+                case DataType::F32:
+                {
+                    /* Up-conversion QS8 -> F32 */
+                    execute_window_loop(window, [&](const Coordinates & id)
+                    {
+                        const qint8x16_t texels_qs8 = vld1q_qs8(reinterpret_cast<const qint8_t *>(input.ptr()));
+
+                        float32x4x2_t texels_low  = vcvt_f32_qs8(vget_low_s8(texels_qs8), _fixed_point_position_input);
+                        float32x4x2_t texels_high = vcvt_f32_qs8(vget_high_s8(texels_qs8), _fixed_point_position_input);
+
+                        vst1q_f32(reinterpret_cast<float *>(output.ptr()), texels_low.val[0]);
+                        vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 4, texels_low.val[1]);
+                        vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 8, texels_high.val[0]);
+                        vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 12, texels_high.val[1]);
+                    },
+                    input, output);
+                    break;
+                }
+                default:
+                    ARM_COMPUTE_ERROR("Output data type not supported");
+            }
+            break;
+        }
+        case DataType::S16:
+        {
+            switch(_output->info()->data_type())
+            {
+                case DataType::U8:
+                {
+                    const int16x8_t b = vdupq_n_s16(-static_cast<int16_t>(_shift));
+
+                    /* Down-conversion S16 -> U8 */
+                    if(ConvertPolicy::SATURATE == _policy)
+                    {
+                        execute_window_loop(window, [&](const Coordinates & id)
+                        {
+                            const int16x8x2_t texels =
+                            {
+                                {
+                                    vqshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr())), b),
+                                    vqshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr()) + 8), b)
+                                }
+                            };
+
+                            vst1q_u8(output.ptr(), vcombine_u8(vqmovun_s16(texels.val[0]), vqmovun_s16(texels.val[1])));
+                        },
+                        input, output);
+                    }
+                    else
+                    {
+                        execute_window_loop(window, [&](const Coordinates & id)
+                        {
+                            const int16x8x2_t texels =
+                            {
+                                {
+                                    vshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr())), b),
+                                    vshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr()) + 8), b)
+                                }
+                            };
+
+                            vst1q_u8(output.ptr(), vcombine_u8(vmovn_u16(vreinterpretq_u16_s16(texels.val[0])),
+                                                               vmovn_u16(vreinterpretq_u16_s16(texels.val[1]))));
+                        },
+                        input, output);
+                    }
+                    break;
+                }
+                case DataType::S32:
+                {
+                    const int32x4_t b = vdupq_n_s32(_shift);
+
+                    /* Up-conversion S16 -> S32 */
+                    execute_window_loop(window, [&](const Coordinates & id)
+                    {
+                        const int16x8x2_t texels =
+                        {
+                            {
+                                vld1q_s16(reinterpret_cast<int16_t *>(input.ptr())),
+                                vld1q_s16(reinterpret_cast<int16_t *>(input.ptr()) + 8)
+                            }
+                        };
+
+                        const int32x4x4_t texels_s32 =
+                        {
+                            {
+                                vshlq_s32(vmovl_s16(vget_low_s16(texels.val[0])), b),
+                                vshlq_s32(vmovl_s16(vget_high_s16(texels.val[0])), b),
+                                vshlq_s32(vmovl_s16(vget_low_s16(texels.val[1])), b),
+                                vshlq_s32(vmovl_s16(vget_high_s16(texels.val[1])), b)
+                            }
+                        };
+
+                        vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()), texels_s32.val[0]);
+                        vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 4, texels_s32.val[1]);
+                        vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 8, texels_s32.val[2]);
+                        vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 12, texels_s32.val[3]);
+                    },
+                    input, output);
+                    break;
+                }
+                default:
+                    ARM_COMPUTE_ERROR("Output data type not supported");
+            }
+            break;
+        }
+        case DataType::U16:
+        {
+            switch(_output->info()->data_type())
+            {
+                case DataType::U8:
+                {
+                    const int16x8_t b = vdupq_n_s16(-static_cast<int16_t>(_shift));
+
+                    /* Down-conversion U16 -> U8 */
+                    if(ConvertPolicy::SATURATE == _policy)
+                    {
+                        execute_window_loop(window, [&](const Coordinates & id)
+                        {
+                            const uint16x8x2_t texels =
+                            {
+                                {
+                                    vqshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr())), b),
+                                    vqshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr()) + 8), b)
+                                }
+                            };
+
+                            vst1q_u8(output.ptr(), vcombine_u8(vqmovn_u16(texels.val[0]), vqmovn_u16(texels.val[1])));
+                        },
+                        input, output);
+                    }
+                    else
+                    {
+                        execute_window_loop(window, [&](const Coordinates & id)
+                        {
+                            const uint16x8x2_t texels =
+                            {
+                                {
+                                    vshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr())), b),
+                                    vshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr()) + 8), b)
+                                }
+                            };
+
+                            vst1q_u8(output.ptr(), vcombine_u8(vmovn_u16(texels.val[0]), vmovn_u16(texels.val[1])));
+                        },
+                        input, output);
+                    }
+                    break;
+                }
+                case DataType::U32:
+                {
+                    const int32x4_t b = vdupq_n_s32(_shift);
+
+                    /* Up-conversion U16 -> U32 */
+                    execute_window_loop(window, [&](const Coordinates & id)
+                    {
+                        const uint16x8x2_t texels =
+                        {
+                            {
+                                vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr())),
+                                vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr()) + 8)
+                            }
+                        };
+
+                        vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()), vshlq_u32(vmovl_u16(vget_low_u16(texels.val[0])), b));
+                        vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()) + 4, vshlq_u32(vmovl_u16(vget_high_u16(texels.val[0])), b));
+                        vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()) + 8, vshlq_u32(vmovl_u16(vget_low_u16(texels.val[1])), b));
+                        vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()) + 12, vshlq_u32(vmovl_u16(vget_high_u16(texels.val[1])), b));
+                    },
+                    input, output);
+                    break;
+                }
+                default:
+                    ARM_COMPUTE_ERROR("Output data type not supported");
+            }
+            break;
+        }
+        case DataType::QS16:
+        {
+            switch(_output->info()->data_type())
+            {
+                case DataType::QS16:
+                {
+                    const int relative_shift = _fixed_point_position_output - _fixed_point_position_input;
+                    /* Fixed point position conversion QS16 -> QS16 */
+                    if(relative_shift != 0 || !in_place)
+                    {
+                        const auto relative_shift_vec = vdupq_n_qs16(relative_shift);
+                        execute_window_loop(window, [&](const Coordinates & id)
+                        {
+                            const qint16x8x2_t texels_qs16 =
+                            {
+                                {
+                                    vld1q_qs16(reinterpret_cast<qint16_t *>(input.ptr())),
+                                    vld1q_qs16(reinterpret_cast<qint16_t *>(input.ptr()) + 8)
+                                }
+                            };
+                            vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()), vqrshlq_s16(texels_qs16.val[0], relative_shift_vec));
+                            vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()) + 8, vqrshlq_s16(texels_qs16.val[1], relative_shift_vec));
+                        },
+                        input, output);
+                    }
+                    break;
+                }
+                case DataType::F32:
+                {
+                    /* Up-conversion QS16 -> F32 */
+                    execute_window_loop(window, [&](const Coordinates & id)
+                    {
+                        const int16x8x2_t texels_qs16 =
+                        {
+                            {
+                                vld1q_s16(reinterpret_cast<qint16_t *>(input.ptr())),
+                                vld1q_s16(reinterpret_cast<qint16_t *>(input.ptr()) + 8)
+                            }
+                        };
+
+                        vst1q_f32(reinterpret_cast<float *>(output.ptr()), vcvt_f32_qs16(vget_low_s16(texels_qs16.val[0]), _fixed_point_position_input));
+                        vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 4, vcvt_f32_qs16(vget_high_s16(texels_qs16.val[0]), _fixed_point_position_input));
+                        vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 8, vcvt_f32_qs16(vget_low_s16(texels_qs16.val[1]), _fixed_point_position_input));
+                        vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 12, vcvt_f32_qs16(vget_high_s16(texels_qs16.val[1]), _fixed_point_position_input));
+                    },
+                    input, output);
+                    break;
+                }
+                default:
+                    ARM_COMPUTE_ERROR("Output data type not supported");
+            }
+            break;
+        }
+        case DataType::F32:
+        {
+            switch(_output->info()->data_type())
+            {
+                case DataType::QS8:
+                {
+                    /* Down-conversion F32 -> QS8 */
+                    execute_window_loop(window, [&](const Coordinates & id)
+                    {
+                        const float32x4x4_t texels_f32 =
+                        {
+                            {
+                                vld1q_f32(reinterpret_cast<const float *>(input.ptr())),
+                                vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 4),
+                                vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 8),
+                                vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 12)
+                            }
+                        };
+
+                        const qint8x16_t texels_s8 = vqcvtq_qs8_f32(texels_f32, _fixed_point_position_output);
+
+                        vst1q_s8(reinterpret_cast<int8_t *>(output.ptr()), texels_s8);
+                    },
+                    input, output);
+                    break;
+                }
+                case DataType::QS16:
+                {
+                    /* Down-conversion F32 -> QS16 */
+                    execute_window_loop(window, [&](const Coordinates & id)
+                    {
+                        const float32x4x2_t texels_f32_1 =
+                        {
+                            {
+                                vld1q_f32(reinterpret_cast<const float *>(input.ptr())),
+                                vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 4),
+                            }
+                        };
+                        const float32x4x2_t texels_f32_2 =
+                        {
+                            {
+                                vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 8),
+                                vld1q_f32(reinterpret_cast<const float *>(input.ptr()) + 12)
+                            }
+                        };
+
+                        vst1q_s16(reinterpret_cast<qint16_t *>(output.ptr()), vqcvtq_qs16_f32(texels_f32_1, _fixed_point_position_output));
+                        vst1q_s16(reinterpret_cast<qint16_t *>(output.ptr()) + 8, vqcvtq_qs16_f32(texels_f32_2, _fixed_point_position_output));
+                    },
+                    input, output);
+                    break;
+                }
+                default:
+                    ARM_COMPUTE_ERROR("Output data type not supported");
+            }
+            break;
+        }
+        default:
+            ARM_COMPUTE_ERROR("Not supported");
+    }
+}
diff --git a/src/core/NEON/kernels/NEDepthwiseConvolution3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolution3x3Kernel.cpp
deleted file mode 100644
index 5c4bd34e05..0000000000
--- a/src/core/NEON/kernels/NEDepthwiseConvolution3x3Kernel.cpp
+++ /dev/null
@@ -1,186 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolution3x3Kernel.h"
-#include "arm_compute/core/NEON/kernels/convolution/NEDirectConvolutionDetail.h"
-
-#include "arm_compute/core/AccessWindowStatic.h"
-#include "arm_compute/core/AccessWindowTranspose.h"
-#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-
-using namespace arm_compute;
-using namespace arm_compute::detail;
-
-NEDepthwiseConvolution3x3Kernel::NEDepthwiseConvolution3x3Kernel()
-    : _border_size(0), _input(), _output(), _weights(), _conv_info()
-{
-}
-
-BorderSize NEDepthwiseConvolution3x3Kernel::border_size() const
-{
-    return _border_size;
-}
-
-void NEDepthwiseConvolution3x3Kernel::configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights);
-    ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != 3 || weights->info()->dimension(1) != 3);
-
-    std::pair<unsigned int, unsigned int> expected_output = scaled_dimensions(input->info()->tensor_shape().x(), input->info()->tensor_shape().y(),
-                                                                              weights->info()->tensor_shape().x(), weights->info()->tensor_shape().y(),
-                                                                              conv_info);
-
-    ARM_COMPUTE_UNUSED(expected_output);
-    ARM_COMPUTE_ERROR_ON(expected_output.first != output->info()->tensor_shape().x());
-    ARM_COMPUTE_ERROR_ON(expected_output.second != output->info()->tensor_shape().y());
-
-    _input                           = input;
-    _output                          = output;
-    _weights                         = weights;
-    _conv_info                       = conv_info;
-    const unsigned int conv_stride_x = conv_info.stride().first;
-    const unsigned int conv_pad_x    = conv_info.pad().first;
-    const unsigned int conv_pad_y    = conv_info.pad().second;
-
-    ARM_COMPUTE_ERROR_ON(conv_stride_x < 1 || conv_stride_x > 3);
-
-    const unsigned int num_elems_written_per_iteration = 16 >> conv_stride_x;
-    _border_size                                       = BorderSize(conv_pad_y, conv_pad_x);
-
-    // Configure kernel window
-    Window win = calculate_max_window(*output->info(), Steps(num_elems_written_per_iteration));
-
-    AccessWindowStatic     input_access(input->info(), -conv_pad_x, -conv_pad_y, input->info()->dimension(0) + _border_size.right, input->info()->dimension(1) + _border_size.bottom);
-    AccessWindowStatic     weights_access(weights->info(), 0, 0, weights->info()->dimension(0), weights->info()->dimension(1));
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
-
-    update_window_and_padding(win, input_access, weights_access, output_access);
-    output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
-
-    INEKernel::configure(win);
-}
-
-template <unsigned int stridex>
-class convolver_3x3
-{
-public:
-    static void convolve(const Window &window, unsigned int num_elems_written_per_iteration,
-                         const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info)
-    {
-        const int          input_stride_x  = input->info()->strides_in_bytes().x();
-        const int          input_stride_y  = input->info()->strides_in_bytes().y();
-        const int          output_stride_y = output->info()->strides_in_bytes().y();
-        const int          kernel_stride_y = weights->info()->strides_in_bytes().y();
-        const int          kernel_stride_z = weights->info()->strides_in_bytes().z();
-        const int          output_w        = output->info()->dimension(0);
-        const int          output_h        = output->info()->dimension(1);
-        const int          delta_input     = get_input_num_elems_processed<stridex>(num_elems_written_per_iteration);
-        const unsigned int conv_stride_y   = std::get<1>(conv_info.stride());
-        const unsigned int conv_pad_x      = std::get<0>(conv_info.pad());
-        const unsigned int conv_pad_y      = std::get<1>(conv_info.pad());
-
-        // setup output window for the iterator
-        Window window_out = window;
-        window_out.set(Window::DimX, Window::Dimension(0, output->info()->dimension(Window::DimX), output->info()->dimension(Window::DimX)));
-        window_out.set(Window::DimY, Window::Dimension(0, output->info()->dimension(Window::DimY), output->info()->dimension(Window::DimY)));
-
-        // setup input window for the iterator
-        Window window_in = window;
-        // we just want execute_window_loop to iterate over the dimensions > 2, so we set the first 2 dimensions to 0
-        window_in.set(Window::DimX, Window::Dimension(0, 0, 0));
-        window_in.set(Window::DimY, Window::Dimension(0, 0, 0));
-
-        Window window_k = calculate_max_window(*weights->info(), Steps(1u));
-
-        Iterator in(input, window_in);
-        Iterator out(output, window_out);
-        Iterator w(weights, window_k);
-
-        const uint8_t *weights_ptr = w.ptr();
-
-        execute_window_loop(window_out, [&](const Coordinates & id)
-        {
-            const uint8_t *input_ptr = in.ptr() - conv_pad_x * input_stride_x - conv_pad_y * input_stride_y;
-            int            ih        = 0;
-            int            oh        = 0;
-
-            const uint8_t *ptr_weights_base = weights_ptr + id.z() * kernel_stride_z;
-            const auto     ptr_weights_r0   = reinterpret_cast<const float *>(ptr_weights_base);
-            const auto     ptr_weights_r1   = reinterpret_cast<const float *>(ptr_weights_base + kernel_stride_y);
-            const auto     ptr_weights_r2   = reinterpret_cast<const float *>(ptr_weights_base + kernel_stride_y * 2);
-            const auto     vw_r0            = load_matrix_row(ptr_weights_r0);
-            const auto     vw_r1            = load_matrix_row(ptr_weights_r1);
-            const auto     vw_r2            = load_matrix_row(ptr_weights_r2);
-
-            for(ih = 0, oh = 0; oh < output_h; ++oh, ih += conv_stride_y)
-            {
-                auto in_top = reinterpret_cast<const float *>(input_ptr + (ih + 0) * input_stride_y);
-                auto in_mid = reinterpret_cast<const float *>(input_ptr + (ih + 1) * input_stride_y);
-                auto in_low = reinterpret_cast<const float *>(input_ptr + (ih + 2) * input_stride_y);
-                auto p_out  = reinterpret_cast<float *>(out.ptr() + oh * output_stride_y);
-
-                for(int ow = 0; ow < output_w; ow += num_elems_written_per_iteration,
-                    in_top += delta_input, in_mid += delta_input, in_low += delta_input, p_out += num_elems_written_per_iteration)
-                {
-                    auto vres = convolve_3x3<stridex>(in_top, in_mid, in_low, vw_r0, vw_r1, vw_r2, 0);
-                    store_results<stridex>(p_out, vres);
-                }
-            }
-        },
-        in, out);
-    }
-};
-
-void NEDepthwiseConvolution3x3Kernel::run(const Window &window, const ThreadInfo &info)
-{
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_UNUSED(info);
-
-    const unsigned int conv_stride_x                   = _conv_info.stride().first;
-    const unsigned int num_elems_written_per_iteration = 16 >> conv_stride_x;
-
-    switch(conv_stride_x)
-    {
-        case 1:
-            convolver_3x3<1>::convolve(window, num_elems_written_per_iteration, _input, _weights, _output, _conv_info);
-            break;
-        case 2:
-            convolver_3x3<2>::convolve(window, num_elems_written_per_iteration, _input, _weights, _output, _conv_info);
-            break;
-        case 3:
-            convolver_3x3<3>::convolve(window, num_elems_written_per_iteration, _input, _weights, _output, _conv_info);
-            break;
-        default:
-            ARM_COMPUTE_ERROR("Not implemented");
-    }
-}
diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
new file mode 100644
index 0000000000..02962e0492
--- /dev/null
+++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h"
+#include "arm_compute/core/NEON/kernels/convolution/NEDirectConvolutionDetail.h"
+
+#include "arm_compute/core/AccessWindowStatic.h"
+#include "arm_compute/core/AccessWindowTranspose.h"
+#include "arm_compute/core/Coordinates.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/NEON/INEKernel.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+using namespace arm_compute;
+using namespace arm_compute::detail;
+
+NEDepthwiseConvolutionLayer3x3Kernel::NEDepthwiseConvolutionLayer3x3Kernel()
+    : _border_size(0), _input(), _output(), _weights(), _conv_info()
+{
+}
+
+BorderSize NEDepthwiseConvolutionLayer3x3Kernel::border_size() const
+{
+    return _border_size;
+}
+
+void NEDepthwiseConvolutionLayer3x3Kernel::configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights);
+    ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != 3 || weights->info()->dimension(1) != 3);
+
+    std::pair<unsigned int, unsigned int> expected_output = scaled_dimensions(input->info()->tensor_shape().x(), input->info()->tensor_shape().y(),
+                                                                              weights->info()->tensor_shape().x(), weights->info()->tensor_shape().y(),
+                                                                              conv_info);
+
+    ARM_COMPUTE_UNUSED(expected_output);
+    ARM_COMPUTE_ERROR_ON(expected_output.first != output->info()->tensor_shape().x());
+    ARM_COMPUTE_ERROR_ON(expected_output.second != output->info()->tensor_shape().y());
+
+    _input                           = input;
+    _output                          = output;
+    _weights                         = weights;
+    _conv_info                       = conv_info;
+    const unsigned int conv_stride_x = conv_info.stride().first;
+    const unsigned int conv_pad_x    = conv_info.pad().first;
+    const unsigned int conv_pad_y    = conv_info.pad().second;
+
+    ARM_COMPUTE_ERROR_ON(conv_stride_x < 1 || conv_stride_x > 3);
+
+    const unsigned int num_elems_written_per_iteration = 16 >> conv_stride_x;
+    _border_size                                       = BorderSize(conv_pad_y, conv_pad_x);
+
+    // Configure kernel window
+    Window win = calculate_max_window(*output->info(), Steps(num_elems_written_per_iteration));
+
+    AccessWindowStatic     input_access(input->info(), -conv_pad_x, -conv_pad_y, input->info()->dimension(0) + _border_size.right, input->info()->dimension(1) + _border_size.bottom);
+    AccessWindowStatic     weights_access(weights->info(), 0, 0, weights->info()->dimension(0), weights->info()->dimension(1));
+    AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
+
+    update_window_and_padding(win, input_access, weights_access, output_access);
+    output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
+
+    INEKernel::configure(win);
+}
+
+template <unsigned int stridex>
+class convolver_3x3
+{
+public:
+    static void convolve(const Window &window, unsigned int num_elems_written_per_iteration,
+                         const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info)
+    {
+        const int          input_stride_x  = input->info()->strides_in_bytes().x();
+        const int          input_stride_y  = input->info()->strides_in_bytes().y();
+        const int          output_stride_y = output->info()->strides_in_bytes().y();
+        const int          kernel_stride_y = weights->info()->strides_in_bytes().y();
+        const int          kernel_stride_z = weights->info()->strides_in_bytes().z();
+        const int          output_w        = output->info()->dimension(0);
+        const int          output_h        = output->info()->dimension(1);
+        const int          delta_input     = get_input_num_elems_processed<stridex>(num_elems_written_per_iteration);
+        const unsigned int conv_stride_y   = std::get<1>(conv_info.stride());
+        const unsigned int conv_pad_x      = std::get<0>(conv_info.pad());
+        const unsigned int conv_pad_y      = std::get<1>(conv_info.pad());
+
+        // setup output window for the iterator
+        Window window_out = window;
+        window_out.set(Window::DimX, Window::Dimension(0, output->info()->dimension(Window::DimX), output->info()->dimension(Window::DimX)));
+        window_out.set(Window::DimY, Window::Dimension(0, output->info()->dimension(Window::DimY), output->info()->dimension(Window::DimY)));
+
+        // setup input window for the iterator
+        Window window_in = window;
+        // we just want execute_window_loop to iterate over the dimensions > 2, so we set the first 2 dimensions to 0
+        window_in.set(Window::DimX, Window::Dimension(0, 0, 0));
+        window_in.set(Window::DimY, Window::Dimension(0, 0, 0));
+
+        Window window_k = calculate_max_window(*weights->info(), Steps(1u));
+
+        Iterator in(input, window_in);
+        Iterator out(output, window_out);
+        Iterator w(weights, window_k);
+
+        const uint8_t *weights_ptr = w.ptr();
+
+        execute_window_loop(window_out, [&](const Coordinates & id)
+        {
+            const uint8_t *input_ptr = in.ptr() - conv_pad_x * input_stride_x - conv_pad_y * input_stride_y;
+            int            ih        = 0;
+            int            oh        = 0;
+
+            const uint8_t *ptr_weights_base = weights_ptr + id.z() * kernel_stride_z;
+            const auto     ptr_weights_r0   = reinterpret_cast<const float *>(ptr_weights_base);
+            const auto     ptr_weights_r1   = reinterpret_cast<const float *>(ptr_weights_base + kernel_stride_y);
+            const auto     ptr_weights_r2   = reinterpret_cast<const float *>(ptr_weights_base + kernel_stride_y * 2);
+            const auto     vw_r0            = load_matrix_row(ptr_weights_r0);
+            const auto     vw_r1            = load_matrix_row(ptr_weights_r1);
+            const auto     vw_r2            = load_matrix_row(ptr_weights_r2);
+
+            for(ih = 0, oh = 0; oh < output_h; ++oh, ih += conv_stride_y)
+            {
+                auto in_top = reinterpret_cast<const float *>(input_ptr + (ih + 0) * input_stride_y);
+                auto in_mid = reinterpret_cast<const float *>(input_ptr + (ih + 1) * input_stride_y);
+                auto in_low = reinterpret_cast<const float *>(input_ptr + (ih + 2) * input_stride_y);
+                auto p_out  = reinterpret_cast<float *>(out.ptr() + oh * output_stride_y);
+
+                for(int ow = 0; ow < output_w; ow += num_elems_written_per_iteration,
+                    in_top += delta_input, in_mid += delta_input, in_low += delta_input, p_out += num_elems_written_per_iteration)
+                {
+                    auto vres = convolve_3x3<stridex>(in_top, in_mid, in_low, vw_r0, vw_r1, vw_r2, 0);
+                    store_results<stridex>(p_out, vres);
+                }
+            }
+        },
+        in, out);
+    }
+};
+
+void NEDepthwiseConvolutionLayer3x3Kernel::run(const Window &window, const ThreadInfo &info)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_UNUSED(info);
+
+    const unsigned int conv_stride_x                   = _conv_info.stride().first;
+    const unsigned int num_elems_written_per_iteration = 16 >> conv_stride_x;
+
+    switch(conv_stride_x)
+    {
+        case 1:
+            convolver_3x3<1>::convolve(window, num_elems_written_per_iteration, _input, _weights, _output, _conv_info);
+            break;
+        case 2:
+            convolver_3x3<2>::convolve(window, num_elems_written_per_iteration, _input, _weights, _output, _conv_info);
+            break;
+        case 3:
+            convolver_3x3<3>::convolve(window, num_elems_written_per_iteration, _input, _weights, _output, _conv_info);
+            break;
+        default:
+            ARM_COMPUTE_ERROR("Not implemented");
+    }
+}
diff --git a/src/core/NEON/kernels/NEL2NormalizeKernel.cpp b/src/core/NEON/kernels/NEL2NormalizeKernel.cpp
deleted file mode 100644
index 12c532afd5..0000000000
--- a/src/core/NEON/kernels/NEL2NormalizeKernel.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/NEON/kernels/NEL2NormalizeKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/NEON/NEMath.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-
-#include <arm_neon.h>
-#include <cmath>
-
-using namespace arm_compute;
-
-namespace
-{
-void l2_normalize_X(const ITensor *in, const ITensor *sum, ITensor *out, float epsilon, const Window &window)
-{
-    Window window_sum(window);
-    window_sum.set(Window::DimX, Window::Dimension(0, 0, 0));
-
-    Window in_slice  = window.first_slice_window_1D();
-    Window sum_slice = window_sum.first_slice_window_1D();
-
-    do
-    {
-        Iterator input_it(in, in_slice);
-        Iterator sum_it(sum, sum_slice);
-        Iterator output_it(out, in_slice);
-
-        const float       sum_value           = *reinterpret_cast<const float *>(sum_it.ptr());
-        const float32x4_t vec_normalize_value = vdupq_n_f32(1.f / std::sqrt(std::max(sum_value, epsilon)));
-
-        execute_window_loop(in_slice, [&](const Coordinates & id)
-        {
-            const auto in_ptr  = reinterpret_cast<const float *>(input_it.ptr());
-            const auto out_ptr = reinterpret_cast<float *>(output_it.ptr());
-
-            vst1q_f32(out_ptr, vmulq_f32(vld1q_f32(in_ptr), vec_normalize_value));
-        },
-        input_it, output_it);
-    }
-    while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice));
-}
-} // namespace
-
-NEL2NormalizeKernel::NEL2NormalizeKernel()
-    : _input(nullptr), _sum(nullptr), _output(nullptr), _axis(0), _epsilon(1e-12)
-{
-}
-
-void NEL2NormalizeKernel::configure(const ITensor *input, const ITensor *sum, ITensor *output, unsigned int axis, float epsilon)
-{
-    ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output);
-    ARM_COMPUTE_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Normalization axis greater than max number of dimensions");
-    ARM_COMPUTE_ERROR_ON_MSG(axis > 0, "Unsupported normalization axis, Supported axis is 0");
-
-    // Output auto initialization if not yet initialized
-    auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
-
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, sum);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
-
-    unsigned int num_elems_processed_per_iteration     = 16 / data_size_from_type(input->info()->data_type());
-    unsigned int num_elems_processed_per_iteration_sum = (axis == 0) ? 1 : num_elems_processed_per_iteration;
-
-    _input   = input;
-    _sum     = sum;
-    _output  = output;
-    _axis    = axis;
-    _epsilon = epsilon;
-
-    // Configure kernel window
-    Window                 win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
-    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
-    AccessWindowHorizontal sum_access(sum->info(), 0, num_elems_processed_per_iteration_sum);
-    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
-
-    update_window_and_padding(win, input_access, sum_access, output_access);
-
-    output_access.set_valid_region(win, input->info()->valid_region());
-
-    INEKernel::configure(win);
-}
-
-void NEL2NormalizeKernel::run(const Window &window, const ThreadInfo &info)
-{
-    ARM_COMPUTE_UNUSED(info);
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
-
-    switch(_axis)
-    {
-        case 0:
-            l2_normalize_X(_input, _sum, _output, _epsilon, window);
-            break;
-        default:
-            ARM_COMPUTE_ERROR("Unsupported normalization axis");
-    }
-}
diff --git a/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp
new file mode 100644
index 0000000000..3bf1d9400e
--- /dev/null
+++ b/src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/NEON/kernels/NEL2NormalizeLayerKernel.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/NEON/NEMath.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+#include <arm_neon.h>
+#include <cmath>
+
+using namespace arm_compute;
+
+namespace
+{
+void l2_normalize_X(const ITensor *in, const ITensor *sum, ITensor *out, float epsilon, const Window &window)
+{
+    Window window_sum(window);
+    window_sum.set(Window::DimX, Window::Dimension(0, 0, 0));
+
+    Window in_slice  = window.first_slice_window_1D();
+    Window sum_slice = window_sum.first_slice_window_1D();
+
+    do
+    {
+        Iterator input_it(in, in_slice);
+        Iterator sum_it(sum, sum_slice);
+        Iterator output_it(out, in_slice);
+
+        const float       sum_value           = *reinterpret_cast<const float *>(sum_it.ptr());
+        const float32x4_t vec_normalize_value = vdupq_n_f32(1.f / std::sqrt(std::max(sum_value, epsilon)));
+
+        execute_window_loop(in_slice, [&](const Coordinates & id)
+        {
+            const auto in_ptr  = reinterpret_cast<const float *>(input_it.ptr());
+            const auto out_ptr = reinterpret_cast<float *>(output_it.ptr());
+
+            vst1q_f32(out_ptr, vmulq_f32(vld1q_f32(in_ptr), vec_normalize_value));
+        },
+        input_it, output_it);
+    }
+    while(window.slide_window_slice_1D(in_slice) && window.slide_window_slice_1D(sum_slice));
+}
+} // namespace
+
+NEL2NormalizeLayerKernel::NEL2NormalizeLayerKernel()
+    : _input(nullptr), _sum(nullptr), _output(nullptr), _axis(0), _epsilon(1e-12)
+{
+}
+
+void NEL2NormalizeLayerKernel::configure(const ITensor *input, const ITensor *sum, ITensor *output, unsigned int axis, float epsilon)
+{
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output);
+    ARM_COMPUTE_ERROR_ON_MSG(axis >= TensorShape::num_max_dimensions, "Normalization axis greater than max number of dimensions");
+    ARM_COMPUTE_ERROR_ON_MSG(axis > 0, "Unsupported normalization axis, Supported axis is 0");
+
+    // Output auto initialization if not yet initialized
+    auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
+
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, sum);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
+
+    unsigned int num_elems_processed_per_iteration     = 16 / data_size_from_type(input->info()->data_type());
+    unsigned int num_elems_processed_per_iteration_sum = (axis == 0) ? 1 : num_elems_processed_per_iteration;
+
+    _input   = input;
+    _sum     = sum;
+    _output  = output;
+    _axis    = axis;
+    _epsilon = epsilon;
+
+    // Configure kernel window
+    Window                 win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
+    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
+    AccessWindowHorizontal sum_access(sum->info(), 0, num_elems_processed_per_iteration_sum);
+    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+
+    update_window_and_padding(win, input_access, sum_access, output_access);
+
+    output_access.set_valid_region(win, input->info()->valid_region());
+
+    INEKernel::configure(win);
+}
+
+void NEL2NormalizeLayerKernel::run(const Window &window, const ThreadInfo &info)
+{
+    ARM_COMPUTE_UNUSED(info);
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
+
+    switch(_axis)
+    {
+        case 0:
+            l2_normalize_X(_input, _sum, _output, _epsilon, window);
+            break;
+        default:
+            ARM_COMPUTE_ERROR("Unsupported normalization axis");
+    }
+}
diff --git a/src/graph/operations/CLSimpleOperations.cpp b/src/graph/operations/CLSimpleOperations.cpp
index 647f88f0e2..8f2bf23ce3 100644
--- a/src/graph/operations/CLSimpleOperations.cpp
+++ b/src/graph/operations/CLSimpleOperations.cpp
@@ -106,7 +106,7 @@ REGISTER_SIMPLE_OPERATION(CLBatchNormalizationLayerOperation, OPENCL, OperationT
     return std::move(batch_norm);
 }
 
-/* DepthConvert Layer */
+/* DepthConvertLayer Layer */
 REGISTER_SIMPLE_OPERATION(CLDepthConvertLayerOperation, OPENCL, OperationType::DepthConvertLayer)
 {
     ARM_COMPUTE_ERROR_ON(ctx.num_inputs() != 1);
@@ -121,7 +121,7 @@ REGISTER_SIMPLE_OPERATION(CLDepthConvertLayerOperation, OPENCL, OperationType::D
     const auto shift       = ctx.parameter<uint32_t>("shift");
 
     // Create and configure function
-    auto depthconvert = arm_compute::support::cpp14::make_unique<arm_compute::CLDepthConvert>();
+    auto depthconvert = arm_compute::support::cpp14::make_unique<arm_compute::CLDepthConvertLayer>();
     depthconvert->configure(in, out, conv_policy, shift);
 
     // Log info
@@ -156,13 +156,13 @@ REGISTER_SIMPLE_OPERATION(CLDepthwiseConvolutionOperation, OPENCL, OperationType
     bool                                    run_3x3_opt = opt3x3 && weights->info()->dimension(0) == 3;
     if(run_3x3_opt)
     {
-        auto depwthwise_conv = arm_compute::support::cpp14::make_unique<arm_compute::CLDepthwiseConvolution>();
+        auto depwthwise_conv = arm_compute::support::cpp14::make_unique<arm_compute::CLDepthwiseConvolutionLayer>();
         depwthwise_conv->configure(in, weights, biases, out, conv_info);
         func = std::move(depwthwise_conv);
     }
     else
     {
-        auto depwthwise_conv = arm_compute::support::cpp14::make_unique<arm_compute::CLDepthwiseConvolution3x3>();
+        auto depwthwise_conv = arm_compute::support::cpp14::make_unique<arm_compute::CLDepthwiseConvolutionLayer3x3>();
         depwthwise_conv->configure(in, weights, biases, out, conv_info);
         func = std::move(depwthwise_conv);
     }
@@ -313,7 +313,7 @@ REGISTER_SIMPLE_OPERATION(CLL2NormalizeLayerOperation, OPENCL, OperationType::L2
     const auto epsilon = ctx.parameter<float>("epsilon");
 
     // Create and configure function
-    auto l2_norm = arm_compute::support::cpp14::make_unique<arm_compute::CLL2Normalize>();
+    auto l2_norm = arm_compute::support::cpp14::make_unique<arm_compute::CLL2NormalizeLayer>();
     l2_norm->configure(in, out, axis, epsilon);
 
     // Log info
diff --git a/src/graph/operations/NESimpleOperations.cpp b/src/graph/operations/NESimpleOperations.cpp
index f234341cec..bb99e8da4b 100644
--- a/src/graph/operations/NESimpleOperations.cpp
+++ b/src/graph/operations/NESimpleOperations.cpp
@@ -106,7 +106,7 @@ REGISTER_SIMPLE_OPERATION(NEBatchNormalizationLayerOperation, NEON, OperationTyp
     return std::move(batch_norm);
 }
 
-/* DepthConvert Layer */
+/* DepthConvertLayer Layer */
 REGISTER_SIMPLE_OPERATION(NEDepthConvertLayerOperation, NEON, OperationType::DepthConvertLayer)
 {
     ARM_COMPUTE_ERROR_ON(ctx.num_inputs() != 1);
@@ -121,7 +121,7 @@ REGISTER_SIMPLE_OPERATION(NEDepthConvertLayerOperation, NEON, OperationType::Dep
     const auto shift       = ctx.parameter<uint32_t>("shift");
 
     // Create and configure function
-    auto depthconvert = arm_compute::support::cpp14::make_unique<arm_compute::NEDepthConvert>();
+    auto depthconvert = arm_compute::support::cpp14::make_unique<arm_compute::NEDepthConvertLayer>();
     depthconvert->configure(in, out, conv_policy, shift);
 
     // Log info
@@ -156,13 +156,13 @@ REGISTER_SIMPLE_OPERATION(NEDepthwiseConvolutionOperation, NEON, OperationType::
     bool                                    run_3x3_opt = opt3x3 && weights->info()->dimension(0) == 3;
     if(run_3x3_opt)
     {
-        auto depwthwise_conv = arm_compute::support::cpp14::make_unique<arm_compute::NEDepthwiseConvolution>();
+        auto depwthwise_conv = arm_compute::support::cpp14::make_unique<arm_compute::NEDepthwiseConvolutionLayer>();
         depwthwise_conv->configure(in, weights, biases, out, conv_info);
         func = std::move(depwthwise_conv);
     }
     else
     {
-        auto depwthwise_conv = arm_compute::support::cpp14::make_unique<arm_compute::NEDepthwiseConvolution3x3>();
+        auto depwthwise_conv = arm_compute::support::cpp14::make_unique<arm_compute::NEDepthwiseConvolutionLayer3x3>();
         depwthwise_conv->configure(in, weights, biases, out, conv_info);
         func = std::move(depwthwise_conv);
     }
@@ -313,7 +313,7 @@ REGISTER_SIMPLE_OPERATION(NEL2NormalizeLayerOperation, NEON, OperationType::L2No
     const auto epsilon = ctx.parameter<float>("epsilon");
 
     // Create and configure function
-    auto l2_norm = arm_compute::support::cpp14::make_unique<arm_compute::NEL2Normalize>();
+    auto l2_norm = arm_compute::support::cpp14::make_unique<arm_compute::NEL2NormalizeLayer>();
     l2_norm->configure(in, out, axis, epsilon);
 
     // Log info
diff --git a/src/runtime/CL/functions/CLDepthConcatenate.cpp b/src/runtime/CL/functions/CLDepthConcatenate.cpp
deleted file mode 100644
index 89e44ca98e..0000000000
--- a/src/runtime/CL/functions/CLDepthConcatenate.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLDepthConcatenate.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/ToolchainSupport.h"
-
-using namespace arm_compute;
-
-CLDepthConcatenate::CLDepthConcatenate() // NOLINT
-    : _inputs_vector(),
-      _concat_kernels_vector(),
-      _border_handlers_vector(),
-      _num_inputs(0)
-{
-}
-
-void CLDepthConcatenate::configure(std::vector<ICLTensor *> inputs_vector, ICLTensor *output) // NOLINT
-{
-    ARM_COMPUTE_ERROR_ON(inputs_vector.size() < 2);
-
-    _num_inputs = inputs_vector.size();
-
-    unsigned int depth_offset = 0;
-
-    _concat_kernels_vector  = arm_compute::support::cpp14::make_unique<CLDepthConcatenateKernel[]>(_num_inputs);
-    _border_handlers_vector = arm_compute::support::cpp14::make_unique<CLFillBorderKernel[]>(_num_inputs);
-
-    TensorShape output_shape = calculate_depth_concatenate_shape(inputs_vector);
-
-    // Output auto inizialitation if not yet initialized
-    auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type(), inputs_vector[0]->info()->fixed_point_position());
-
-    for(unsigned int i = 0; i < _num_inputs; i++)
-    {
-        _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output);
-        _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue(0));
-
-        depth_offset += inputs_vector.at(i)->info()->dimension(2);
-    }
-}
-
-void CLDepthConcatenate::run()
-{
-    cl::CommandQueue q = CLScheduler::get().queue();
-
-    for(unsigned i = 0; i < _num_inputs; i++)
-    {
-        CLScheduler::get().enqueue(_border_handlers_vector[i], false);
-        CLScheduler::get().enqueue(_concat_kernels_vector[i], true);
-    }
-}
diff --git a/src/runtime/CL/functions/CLDepthConcatenateLayer.cpp b/src/runtime/CL/functions/CLDepthConcatenateLayer.cpp
new file mode 100644
index 0000000000..05b5d54cf7
--- /dev/null
+++ b/src/runtime/CL/functions/CLDepthConcatenateLayer.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h"
+
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+CLDepthConcatenateLayer::CLDepthConcatenateLayer() // NOLINT
+    : _inputs_vector(),
+      _concat_kernels_vector(),
+      _border_handlers_vector(),
+      _num_inputs(0)
+{
+}
+
+void CLDepthConcatenateLayer::configure(std::vector<ICLTensor *> inputs_vector, ICLTensor *output) // NOLINT
+{
+    ARM_COMPUTE_ERROR_ON(inputs_vector.size() < 2);
+
+    _num_inputs = inputs_vector.size();
+
+    unsigned int depth_offset = 0;
+
+    _concat_kernels_vector  = arm_compute::support::cpp14::make_unique<CLDepthConcatenateLayerKernel[]>(_num_inputs);
+    _border_handlers_vector = arm_compute::support::cpp14::make_unique<CLFillBorderKernel[]>(_num_inputs);
+
+    TensorShape output_shape = calculate_depth_concatenate_shape(inputs_vector);
+
+    // Output auto inizialitation if not yet initialized
+    auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type(), inputs_vector[0]->info()->fixed_point_position());
+
+    for(unsigned int i = 0; i < _num_inputs; i++)
+    {
+        _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output);
+        _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue(0));
+
+        depth_offset += inputs_vector.at(i)->info()->dimension(2);
+    }
+}
+
+void CLDepthConcatenateLayer::run()
+{
+    cl::CommandQueue q = CLScheduler::get().queue();
+
+    for(unsigned i = 0; i < _num_inputs; i++)
+    {
+        CLScheduler::get().enqueue(_border_handlers_vector[i], false);
+        CLScheduler::get().enqueue(_concat_kernels_vector[i], true);
+    }
+}
diff --git a/src/runtime/CL/functions/CLDepthConvert.cpp b/src/runtime/CL/functions/CLDepthConvert.cpp
deleted file mode 100644
index b64d05b8b1..0000000000
--- a/src/runtime/CL/functions/CLDepthConvert.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLDepthConvert.h"
-
-#include "arm_compute/core/CL/kernels/CLDepthConvertKernel.h"
-#include "support/ToolchainSupport.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void CLDepthConvert::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift)
-{
-    auto k = arm_compute::support::cpp14::make_unique<CLDepthConvertKernel>();
-    k->configure(input, output, policy, shift);
-    _kernel = std::move(k);
-}
diff --git a/src/runtime/CL/functions/CLDepthConvertLayer.cpp b/src/runtime/CL/functions/CLDepthConvertLayer.cpp
new file mode 100644
index 0000000000..b448465909
--- /dev/null
+++ b/src/runtime/CL/functions/CLDepthConvertLayer.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h"
+
+#include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
+#include "support/ToolchainSupport.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void CLDepthConvertLayer::configure(const ICLTensor *input, ICLTensor *output, ConvertPolicy policy, uint32_t shift)
+{
+    auto k = arm_compute::support::cpp14::make_unique<CLDepthConvertLayerKernel>();
+    k->configure(input, output, policy, shift);
+    _kernel = std::move(k);
+}
diff --git a/src/runtime/CL/functions/CLDepthwiseConvolution.cpp b/src/runtime/CL/functions/CLDepthwiseConvolution.cpp
deleted file mode 100644
index 81149508dd..0000000000
--- a/src/runtime/CL/functions/CLDepthwiseConvolution.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/ToolchainSupport.h"
-
-using namespace arm_compute;
-
-CLDepthwiseConvolution3x3::CLDepthwiseConvolution3x3()
-    : _kernel(), _border_handler()
-{
-}
-
-void CLDepthwiseConvolution3x3::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
-
-    _kernel.set_target(CLScheduler::get().target());
-    _kernel.configure(input, weights, biases, output, conv_info);
-
-    // Configure border handler
-    PixelValue &&zero_value(0.f);
-    if(is_data_type_quantized_asymmetric(input->info()->data_type()))
-    {
-        zero_value = PixelValue(static_cast<uint8_t>(input->info()->quantization_info().offset));
-    }
-    _border_handler.configure(input, _kernel.border_size(), BorderMode::CONSTANT, zero_value);
-}
-
-void CLDepthwiseConvolution3x3::run()
-{
-    CLScheduler::get().enqueue(_border_handler);
-    CLScheduler::get().enqueue(_kernel);
-}
-
-CLDepthwiseConvolution::CLDepthwiseConvolution()
-    : _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _vector_to_tensor_kernel(), _v2mm_input_fill_border(), _v2mm_weights_fill_border(), _input_reshaped(), _weights_reshaped(),
-      _v2mm_output()
-{
-}
-
-void CLDepthwiseConvolution::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
-    ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != weights->info()->dimension(2));
-
-    const size_t weights_w = weights->info()->dimension(0);
-    const size_t weights_h = weights->info()->dimension(1);
-    const size_t weights_z = weights->info()->dimension(2);
-
-    const bool      has_bias   = (biases != nullptr);
-    const GPUTarget gpu_target = CLScheduler::get().target();
-
-    unsigned int conv_w = 0;
-    unsigned int conv_h = 0;
-    std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights_w, weights_h, conv_info);
-
-    // Set up intermediate tensors
-    const size_t patch_size = weights_w * weights_h + ((has_bias) ? 1 : 0);
-    const size_t conv_size  = conv_w * conv_h;
-
-    // Im2Col configuration
-    TensorShape shape_im2col = input->info()->tensor_shape();
-    shape_im2col.set(0, patch_size);
-    shape_im2col.set(1, conv_size);
-    shape_im2col.set(2, weights_z);
-    const TensorInfo info_im2col(shape_im2col, 1, input->info()->data_type(), input->info()->fixed_point_position());
-    _input_reshaped.allocator()->init(info_im2col);
-    _im2col_kernel.set_target(gpu_target);
-    _im2col_kernel.configure(input, &_input_reshaped, Size2D(weights_w, weights_h), conv_info, has_bias);
-
-    // Weights reshape configuration
-    const TensorShape shape_weights_reshape(patch_size, weights_z);
-    const TensorInfo  info_weights_reshape(shape_weights_reshape, 1, weights->info()->data_type(), weights->info()->fixed_point_position());
-    _weights_reshaped.allocator()->init(info_weights_reshape);
-    _weights_reshape_kernel.configure(weights, &_weights_reshaped, biases);
-
-    // GEMV configuration
-    TensorShape shape_v2mm_out = input->info()->tensor_shape();
-    shape_v2mm_out.set(0, conv_size * weights_z);
-    shape_v2mm_out.set(1, 1);
-    shape_v2mm_out.set(2, 1);
-    const TensorInfo info_v2mm_out(shape_v2mm_out, 1, input->info()->data_type(), input->info()->fixed_point_position());
-    _v2mm_output.allocator()->init(info_v2mm_out);
-    _v2mm_kernel.set_target(gpu_target);
-    _v2mm_kernel.configure(&_input_reshaped, &_weights_reshaped, &_v2mm_output);
-    _vector_to_tensor_kernel.configure(&_v2mm_output, output, conv_w, conv_h);
-
-    BorderSize border_size = _v2mm_kernel.border_size();
-    _v2mm_input_fill_border.configure(&_input_reshaped, border_size, BorderMode::CONSTANT, PixelValue(0));
-
-    border_size.bottom = 0;
-    _v2mm_weights_fill_border.configure(&_weights_reshaped, border_size, BorderMode::CONSTANT, PixelValue(0));
-
-    // Allocate intermediate tensors
-    _input_reshaped.allocator()->allocate();
-    _weights_reshaped.allocator()->allocate();
-    _v2mm_output.allocator()->allocate();
-}
-
-void CLDepthwiseConvolution::run()
-{
-    CLScheduler::get().enqueue(_im2col_kernel);
-
-    CLScheduler::get().enqueue(_weights_reshape_kernel);
-
-    CLScheduler::get().enqueue(_v2mm_input_fill_border);
-    CLScheduler::get().enqueue(_v2mm_weights_fill_border);
-    CLScheduler::get().enqueue(_v2mm_kernel);
-
-    CLScheduler::get().enqueue(_vector_to_tensor_kernel);
-}
diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
new file mode 100644
index 0000000000..02273fe08b
--- /dev/null
+++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h"
+
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+CLDepthwiseConvolutionLayer3x3::CLDepthwiseConvolutionLayer3x3()
+    : _kernel(), _border_handler()
+{
+}
+
+void CLDepthwiseConvolutionLayer3x3::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
+
+    _kernel.set_target(CLScheduler::get().target());
+    _kernel.configure(input, weights, biases, output, conv_info);
+
+    // Configure border handler
+    PixelValue &&zero_value(0.f);
+    if(is_data_type_quantized_asymmetric(input->info()->data_type()))
+    {
+        zero_value = PixelValue(static_cast<uint8_t>(input->info()->quantization_info().offset));
+    }
+    _border_handler.configure(input, _kernel.border_size(), BorderMode::CONSTANT, zero_value);
+}
+
+void CLDepthwiseConvolutionLayer3x3::run()
+{
+    CLScheduler::get().enqueue(_border_handler);
+    CLScheduler::get().enqueue(_kernel);
+}
+
+CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayer()
+    : _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _vector_to_tensor_kernel(), _v2mm_input_fill_border(), _v2mm_weights_fill_border(), _input_reshaped(), _weights_reshaped(),
+      _v2mm_output()
+{
+}
+
+void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
+    ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != weights->info()->dimension(2));
+
+    const size_t weights_w = weights->info()->dimension(0);
+    const size_t weights_h = weights->info()->dimension(1);
+    const size_t weights_z = weights->info()->dimension(2);
+
+    const bool      has_bias   = (biases != nullptr);
+    const GPUTarget gpu_target = CLScheduler::get().target();
+
+    unsigned int conv_w = 0;
+    unsigned int conv_h = 0;
+    std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights_w, weights_h, conv_info);
+
+    // Set up intermediate tensors
+    const size_t patch_size = weights_w * weights_h + ((has_bias) ? 1 : 0);
+    const size_t conv_size  = conv_w * conv_h;
+
+    // Im2Col configuration
+    TensorShape shape_im2col = input->info()->tensor_shape();
+    shape_im2col.set(0, patch_size);
+    shape_im2col.set(1, conv_size);
+    shape_im2col.set(2, weights_z);
+    const TensorInfo info_im2col(shape_im2col, 1, input->info()->data_type(), input->info()->fixed_point_position());
+    _input_reshaped.allocator()->init(info_im2col);
+    _im2col_kernel.set_target(gpu_target);
+    _im2col_kernel.configure(input, &_input_reshaped, Size2D(weights_w, weights_h), conv_info, has_bias);
+
+    // Weights reshape configuration
+    const TensorShape shape_weights_reshape(patch_size, weights_z);
+    const TensorInfo  info_weights_reshape(shape_weights_reshape, 1, weights->info()->data_type(), weights->info()->fixed_point_position());
+    _weights_reshaped.allocator()->init(info_weights_reshape);
+    _weights_reshape_kernel.configure(weights, &_weights_reshaped, biases);
+
+    // GEMV configuration
+    TensorShape shape_v2mm_out = input->info()->tensor_shape();
+    shape_v2mm_out.set(0, conv_size * weights_z);
+    shape_v2mm_out.set(1, 1);
+    shape_v2mm_out.set(2, 1);
+    const TensorInfo info_v2mm_out(shape_v2mm_out, 1, input->info()->data_type(), input->info()->fixed_point_position());
+    _v2mm_output.allocator()->init(info_v2mm_out);
+    _v2mm_kernel.set_target(gpu_target);
+    _v2mm_kernel.configure(&_input_reshaped, &_weights_reshaped, &_v2mm_output);
+    _vector_to_tensor_kernel.configure(&_v2mm_output, output, conv_w, conv_h);
+
+    BorderSize border_size = _v2mm_kernel.border_size();
+    _v2mm_input_fill_border.configure(&_input_reshaped, border_size, BorderMode::CONSTANT, PixelValue(0));
+
+    border_size.bottom = 0;
+    _v2mm_weights_fill_border.configure(&_weights_reshaped, border_size, BorderMode::CONSTANT, PixelValue(0));
+
+    // Allocate intermediate tensors
+    _input_reshaped.allocator()->allocate();
+    _weights_reshaped.allocator()->allocate();
+    _v2mm_output.allocator()->allocate();
+}
+
+void CLDepthwiseConvolutionLayer::run()
+{
+    CLScheduler::get().enqueue(_im2col_kernel);
+
+    CLScheduler::get().enqueue(_weights_reshape_kernel);
+
+    CLScheduler::get().enqueue(_v2mm_input_fill_border);
+    CLScheduler::get().enqueue(_v2mm_weights_fill_border);
+    CLScheduler::get().enqueue(_v2mm_kernel);
+
+    CLScheduler::get().enqueue(_vector_to_tensor_kernel);
+}
diff --git a/src/runtime/CL/functions/CLL2Normalize.cpp b/src/runtime/CL/functions/CLL2Normalize.cpp
deleted file mode 100644
index 99be8cae4c..0000000000
--- a/src/runtime/CL/functions/CLL2Normalize.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLL2Normalize.h"
-
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/CL/kernels/CLL2NormalizeKernel.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "support/ToolchainSupport.h"
-
-using namespace arm_compute;
-
-CLL2Normalize::CLL2Normalize(std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(std::move(memory_manager)), _reduce_func(), _normalize_kernel(), _sumsq()
-{
-}
-
-void CLL2Normalize::configure(ICLTensor *input, ICLTensor *output, unsigned int axis, float epsilon)
-{
-    // Manage intermediate buffers
-    _memory_group.manage(&_sumsq);
-
-    // Configure kernels
-    _reduce_func.configure(input, &_sumsq, axis, ReductionOperation::SUM_SQUARE);
-    _normalize_kernel.configure(input, &_sumsq, output, axis, epsilon);
-
-    // Allocate intermediate tensor
-    _sumsq.allocator()->allocate();
-}
-
-void CLL2Normalize::run()
-{
-    _memory_group.acquire();
-
-    _reduce_func.run();
-    CLScheduler::get().enqueue(_normalize_kernel, true);
-
-    _memory_group.release();
-}
diff --git a/src/runtime/CL/functions/CLL2NormalizeLayer.cpp b/src/runtime/CL/functions/CLL2NormalizeLayer.cpp
new file mode 100644
index 0000000000..d1bb65f1c9
--- /dev/null
+++ b/src/runtime/CL/functions/CLL2NormalizeLayer.cpp
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h"
+
+#include "arm_compute/core/CL/ICLTensor.h"
+#include "arm_compute/core/CL/kernels/CLL2NormalizeLayerKernel.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+CLL2NormalizeLayer::CLL2NormalizeLayer(std::shared_ptr<IMemoryManager> memory_manager)
+    : _memory_group(std::move(memory_manager)), _reduce_func(), _normalize_kernel(), _sumsq()
+{
+}
+
+void CLL2NormalizeLayer::configure(ICLTensor *input, ICLTensor *output, unsigned int axis, float epsilon)
+{
+    // Manage intermediate buffers
+    _memory_group.manage(&_sumsq);
+
+    // Configure kernels
+    _reduce_func.configure(input, &_sumsq, axis, ReductionOperation::SUM_SQUARE);
+    _normalize_kernel.configure(input, &_sumsq, output, axis, epsilon);
+
+    // Allocate intermediate tensor
+    _sumsq.allocator()->allocate();
+}
+
+void CLL2NormalizeLayer::run()
+{
+    _memory_group.acquire();
+
+    _reduce_func.run();
+    CLScheduler::get().enqueue(_normalize_kernel, true);
+
+    _memory_group.release();
+}
diff --git a/src/runtime/CL/functions/CLLaplacianPyramid.cpp b/src/runtime/CL/functions/CLLaplacianPyramid.cpp
index a395487103..7e5278f380 100644
--- a/src/runtime/CL/functions/CLLaplacianPyramid.cpp
+++ b/src/runtime/CL/functions/CLLaplacianPyramid.cpp
@@ -29,7 +29,7 @@
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/functions/CLArithmeticSubtraction.h"
-#include "arm_compute/runtime/CL/functions/CLDepthConvert.h"
+#include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h"
 #include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
 #include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h"
 #include "support/ToolchainSupport.h"
diff --git a/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.cpp b/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.cpp
deleted file mode 100755
index ed756cf261..0000000000
--- a/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
-#include "support/ToolchainSupport.h"
-
-using namespace arm_compute;
-
-GCDepthConcatenate::GCDepthConcatenate() //NOLINT
-    : _concat_kernels_vector(),
-      _border_handlers_vector(),
-      _num_inputs(0)
-{
-}
-
-void GCDepthConcatenate::configure(std::vector<IGCTensor *> inputs_vector, IGCTensor *output) //NOLINT
-{
-    ARM_COMPUTE_ERROR_ON(inputs_vector.size() < 2);
-
-    _num_inputs = inputs_vector.size();
-
-    unsigned int depth_offset = 0;
-
-    _concat_kernels_vector  = arm_compute::support::cpp14::make_unique<GCDepthConcatenateKernel[]>(_num_inputs);
-    _border_handlers_vector = arm_compute::support::cpp14::make_unique<GCFillBorderKernel[]>(_num_inputs);
-
-    for(unsigned int i = 0; i < _num_inputs; i++)
-    {
-        _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output);
-        _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue(0));
-
-        depth_offset += inputs_vector.at(i)->info()->dimension(2);
-    }
-}
-
-void GCDepthConcatenate::run()
-{
-    for(unsigned i = 0; i < _num_inputs; i++)
-    {
-        GCScheduler::get().enqueue(_border_handlers_vector[i], false);
-        GCScheduler::get().enqueue(_concat_kernels_vector[i], true);
-    }
-}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp
new file mode 100755
index 0000000000..ee0b121695
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+GCDepthConcatenateLayer::GCDepthConcatenateLayer() //NOLINT
+    : _concat_kernels_vector(),
+      _border_handlers_vector(),
+      _num_inputs(0)
+{
+}
+
+void GCDepthConcatenateLayer::configure(std::vector<IGCTensor *> inputs_vector, IGCTensor *output) //NOLINT
+{
+    ARM_COMPUTE_ERROR_ON(inputs_vector.size() < 2);
+
+    _num_inputs = inputs_vector.size();
+
+    unsigned int depth_offset = 0;
+
+    _concat_kernels_vector  = arm_compute::support::cpp14::make_unique<GCDepthConcatenateLayerKernel[]>(_num_inputs);
+    _border_handlers_vector = arm_compute::support::cpp14::make_unique<GCFillBorderKernel[]>(_num_inputs);
+
+    for(unsigned int i = 0; i < _num_inputs; i++)
+    {
+        _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output);
+        _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue(0));
+
+        depth_offset += inputs_vector.at(i)->info()->dimension(2);
+    }
+}
+
+void GCDepthConcatenateLayer::run()
+{
+    for(unsigned i = 0; i < _num_inputs; i++)
+    {
+        GCScheduler::get().enqueue(_border_handlers_vector[i], false);
+        GCScheduler::get().enqueue(_concat_kernels_vector[i], true);
+    }
+}
diff --git a/src/runtime/NEON/functions/NEDepthConcatenate.cpp b/src/runtime/NEON/functions/NEDepthConcatenate.cpp
deleted file mode 100644
index f8ad2abe61..0000000000
--- a/src/runtime/NEON/functions/NEDepthConcatenate.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/functions/NEDepthConcatenate.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "support/ToolchainSupport.h"
-
-using namespace arm_compute;
-
-NEDepthConcatenate::NEDepthConcatenate() // NOLINT
-    : _inputs_vector(),
-      _concat_kernels_vector(),
-      _border_handlers_vector(),
-      _num_inputs(0)
-{
-}
-
-void NEDepthConcatenate::configure(std::vector<ITensor *> inputs_vector, ITensor *output) // NOLINT
-{
-    ARM_COMPUTE_ERROR_ON(inputs_vector.size() < 2);
-
-    _num_inputs             = inputs_vector.size();
-    _concat_kernels_vector  = arm_compute::support::cpp14::make_unique<NEDepthConcatenateKernel[]>(_num_inputs);
-    _border_handlers_vector = arm_compute::support::cpp14::make_unique<NEFillBorderKernel[]>(_num_inputs);
-
-    TensorShape output_shape = calculate_depth_concatenate_shape(inputs_vector);
-
-    // Output auto inizialitation if not yet initialized
-    auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type(), inputs_vector[0]->info()->fixed_point_position());
-
-    unsigned int depth_offset = 0;
-    for(unsigned int i = 0; i < _num_inputs; ++i)
-    {
-        _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output);
-        _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue(static_cast<float>(0.f)));
-
-        depth_offset += inputs_vector.at(i)->info()->dimension(2);
-    }
-}
-
-void NEDepthConcatenate::run()
-{
-    for(unsigned i = 0; i < _num_inputs; ++i)
-    {
-        NEScheduler::get().schedule(&_border_handlers_vector[i], Window::DimX);
-        NEScheduler::get().schedule(&_concat_kernels_vector[i], Window::DimX);
-    }
-}
diff --git a/src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp b/src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp
new file mode 100644
index 0000000000..437c9417ce
--- /dev/null
+++ b/src/runtime/NEON/functions/NEDepthConcatenateLayer.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+NEDepthConcatenateLayer::NEDepthConcatenateLayer() // NOLINT
+    : _inputs_vector(),
+      _concat_kernels_vector(),
+      _border_handlers_vector(),
+      _num_inputs(0)
+{
+}
+
+void NEDepthConcatenateLayer::configure(std::vector<ITensor *> inputs_vector, ITensor *output) // NOLINT
+{
+    ARM_COMPUTE_ERROR_ON(inputs_vector.size() < 2);
+
+    _num_inputs             = inputs_vector.size();
+    _concat_kernels_vector  = arm_compute::support::cpp14::make_unique<NEDepthConcatenateLayerKernel[]>(_num_inputs);
+    _border_handlers_vector = arm_compute::support::cpp14::make_unique<NEFillBorderKernel[]>(_num_inputs);
+
+    TensorShape output_shape = calculate_depth_concatenate_shape(inputs_vector);
+
+    // Output auto inizialitation if not yet initialized
+    auto_init_if_empty(*output->info(), output_shape, 1, inputs_vector[0]->info()->data_type(), inputs_vector[0]->info()->fixed_point_position());
+
+    unsigned int depth_offset = 0;
+    for(unsigned int i = 0; i < _num_inputs; ++i)
+    {
+        _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output);
+        _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue(static_cast<float>(0.f)));
+
+        depth_offset += inputs_vector.at(i)->info()->dimension(2);
+    }
+}
+
+void NEDepthConcatenateLayer::run()
+{
+    for(unsigned i = 0; i < _num_inputs; ++i)
+    {
+        NEScheduler::get().schedule(&_border_handlers_vector[i], Window::DimX);
+        NEScheduler::get().schedule(&_concat_kernels_vector[i], Window::DimX);
+    }
+}
diff --git a/src/runtime/NEON/functions/NEDepthConvert.cpp b/src/runtime/NEON/functions/NEDepthConvert.cpp
deleted file mode 100644
index 37857b6534..0000000000
--- a/src/runtime/NEON/functions/NEDepthConvert.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Copyright (c) 2016, 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h"
-
-#include "arm_compute/core/NEON/kernels/NEDepthConvertKernel.h"
-#include "support/ToolchainSupport.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void NEDepthConvert::configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift)
-{
-    auto k = arm_compute::support::cpp14::make_unique<NEDepthConvertKernel>();
-    k->configure(input, output, policy, shift);
-    _kernel = std::move(k);
-}
diff --git a/src/runtime/NEON/functions/NEDepthConvertLayer.cpp b/src/runtime/NEON/functions/NEDepthConvertLayer.cpp
new file mode 100644
index 0000000000..9a75404fcd
--- /dev/null
+++ b/src/runtime/NEON/functions/NEDepthConvertLayer.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h"
+
+#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h"
+#include "support/ToolchainSupport.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void NEDepthConvertLayer::configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift)
+{
+    auto k = arm_compute::support::cpp14::make_unique<NEDepthConvertLayerKernel>();
+    k->configure(input, output, policy, shift);
+    _kernel = std::move(k);
+}
diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolution.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolution.cpp
deleted file mode 100644
index e12bc07464..0000000000
--- a/src/runtime/NEON/functions/NEDepthwiseConvolution.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolution.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "support/ToolchainSupport.h"
-
-using namespace arm_compute;
-
-NEDepthwiseConvolution3x3::NEDepthwiseConvolution3x3()
-    : _kernel(), _bias_kernel(), _border_handler(), _has_bias(false)
-{
-}
-
-void NEDepthwiseConvolution3x3::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights);
-
-    // Call convolution kernel
-    _kernel.configure(input, weights, output, conv_info);
-    _border_handler.configure(input, _kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<float>(0.f)));
-    if(biases != nullptr)
-    {
-        _bias_kernel.configure(output, biases);
-        _has_bias = true;
-    }
-}
-
-void NEDepthwiseConvolution3x3::run()
-{
-    NEScheduler::get().schedule(&_border_handler, Window::DimX);
-    NEScheduler::get().schedule(&_kernel, Window::DimX);
-    if(_has_bias)
-    {
-        NEScheduler::get().schedule(&_bias_kernel, Window::DimX);
-    }
-}
-
-NEDepthwiseConvolution::NEDepthwiseConvolution()
-    : _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _vector_to_tensor_kernel(), _input_reshaped(), _weights_reshaped(), _v2mm_output()
-{
-}
-
-void NEDepthwiseConvolution::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info)
-{
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
-    ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != weights->info()->dimension(2));
-
-    const size_t weights_w = weights->info()->dimension(0);
-    const size_t weights_h = weights->info()->dimension(1);
-    const size_t weights_z = weights->info()->dimension(2);
-
-    bool has_bias = (biases != nullptr);
-
-    unsigned int conv_w = 0;
-    unsigned int conv_h = 0;
-    std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights_w, weights_h, conv_info);
-
-    // Set up intermediate tensors
-    const size_t patch_size = weights_w * weights_h + ((has_bias) ? 1 : 0);
-    const size_t conv_size  = conv_w * conv_h;
-
-    // Im2Col configuration
-    TensorShape shape_im2col = input->info()->tensor_shape();
-    shape_im2col.set(0, patch_size);
-    shape_im2col.set(1, conv_size);
-    shape_im2col.set(2, weights_z);
-    const TensorInfo info_im2col(shape_im2col, 1, input->info()->data_type(), input->info()->fixed_point_position());
-    _input_reshaped.allocator()->init(info_im2col);
-    _im2col_kernel.configure(input, &_input_reshaped, Size2D(weights_w, weights_h), conv_info, has_bias);
-
-    // Weights reshape configuration
-    const TensorShape shape_weights_reshape(patch_size, weights_z);
-    const TensorInfo  info_weights_reshape(shape_weights_reshape, 1, weights->info()->data_type(), weights->info()->fixed_point_position());
-    _weights_reshaped.allocator()->init(info_weights_reshape);
-    _weights_reshape_kernel.configure(weights, &_weights_reshaped, biases);
-
-    // GEMV configuration
-    TensorShape shape_v2mm_out = input->info()->tensor_shape();
-    shape_v2mm_out.set(0, conv_size * weights_z);
-    shape_v2mm_out.set(1, 1);
-    shape_v2mm_out.set(2, 1);
-    const TensorInfo info_v2mm_out(shape_v2mm_out, 1, input->info()->data_type(), input->info()->fixed_point_position());
-    _v2mm_output.allocator()->init(info_v2mm_out);
-    _v2mm_kernel.configure(&_input_reshaped, &_weights_reshaped, &_v2mm_output);
-    _vector_to_tensor_kernel.configure(&_v2mm_output, output, conv_w, conv_h);
-
-    // Allocate intermediate tensors
-    _input_reshaped.allocator()->allocate();
-    _weights_reshaped.allocator()->allocate();
-    _v2mm_output.allocator()->allocate();
-}
-
-void NEDepthwiseConvolution::run()
-{
-    NEScheduler::get().schedule(&_im2col_kernel, Window::DimX);
-    NEScheduler::get().schedule(&_weights_reshape_kernel, Window::DimX);
-    NEScheduler::get().schedule(&_v2mm_kernel, Window::DimX);
-    NEScheduler::get().schedule(&_vector_to_tensor_kernel, Window::DimX);
-}
\ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
new file mode 100644
index 0000000000..b890c6f5d5
--- /dev/null
+++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+NEDepthwiseConvolutionLayer3x3::NEDepthwiseConvolutionLayer3x3()
+    : _kernel(), _bias_kernel(), _border_handler(), _has_bias(false)
+{
+}
+
+void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights);
+
+    // Call convolution kernel
+    _kernel.configure(input, weights, output, conv_info);
+    _border_handler.configure(input, _kernel.border_size(), BorderMode::CONSTANT, PixelValue(static_cast<float>(0.f)));
+    if(biases != nullptr)
+    {
+        _bias_kernel.configure(output, biases);
+        _has_bias = true;
+    }
+}
+
+void NEDepthwiseConvolutionLayer3x3::run()
+{
+    NEScheduler::get().schedule(&_border_handler, Window::DimX);
+    NEScheduler::get().schedule(&_kernel, Window::DimX);
+    if(_has_bias)
+    {
+        NEScheduler::get().schedule(&_bias_kernel, Window::DimX);
+    }
+}
+
+NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer()
+    : _im2col_kernel(), _weights_reshape_kernel(), _v2mm_kernel(), _vector_to_tensor_kernel(), _input_reshaped(), _weights_reshaped(), _v2mm_output()
+{
+}
+
+void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
+    ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != weights->info()->dimension(2));
+
+    const size_t weights_w = weights->info()->dimension(0);
+    const size_t weights_h = weights->info()->dimension(1);
+    const size_t weights_z = weights->info()->dimension(2);
+
+    bool has_bias = (biases != nullptr);
+
+    unsigned int conv_w = 0;
+    unsigned int conv_h = 0;
+    std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights_w, weights_h, conv_info);
+
+    // Set up intermediate tensors
+    const size_t patch_size = weights_w * weights_h + ((has_bias) ? 1 : 0);
+    const size_t conv_size  = conv_w * conv_h;
+
+    // Im2Col configuration
+    TensorShape shape_im2col = input->info()->tensor_shape();
+    shape_im2col.set(0, patch_size);
+    shape_im2col.set(1, conv_size);
+    shape_im2col.set(2, weights_z);
+    const TensorInfo info_im2col(shape_im2col, 1, input->info()->data_type(), input->info()->fixed_point_position());
+    _input_reshaped.allocator()->init(info_im2col);
+    _im2col_kernel.configure(input, &_input_reshaped, Size2D(weights_w, weights_h), conv_info, has_bias);
+
+    // Weights reshape configuration
+    const TensorShape shape_weights_reshape(patch_size, weights_z);
+    const TensorInfo  info_weights_reshape(shape_weights_reshape, 1, weights->info()->data_type(), weights->info()->fixed_point_position());
+    _weights_reshaped.allocator()->init(info_weights_reshape);
+    _weights_reshape_kernel.configure(weights, &_weights_reshaped, biases);
+
+    // GEMV configuration
+    TensorShape shape_v2mm_out = input->info()->tensor_shape();
+    shape_v2mm_out.set(0, conv_size * weights_z);
+    shape_v2mm_out.set(1, 1);
+    shape_v2mm_out.set(2, 1);
+    const TensorInfo info_v2mm_out(shape_v2mm_out, 1, input->info()->data_type(), input->info()->fixed_point_position());
+    _v2mm_output.allocator()->init(info_v2mm_out);
+    _v2mm_kernel.configure(&_input_reshaped, &_weights_reshaped, &_v2mm_output);
+    _vector_to_tensor_kernel.configure(&_v2mm_output, output, conv_w, conv_h);
+
+    // Allocate intermediate tensors
+    _input_reshaped.allocator()->allocate();
+    _weights_reshaped.allocator()->allocate();
+    _v2mm_output.allocator()->allocate();
+}
+
+void NEDepthwiseConvolutionLayer::run()
+{
+    NEScheduler::get().schedule(&_im2col_kernel, Window::DimX);
+    NEScheduler::get().schedule(&_weights_reshape_kernel, Window::DimX);
+    NEScheduler::get().schedule(&_v2mm_kernel, Window::DimX);
+    NEScheduler::get().schedule(&_vector_to_tensor_kernel, Window::DimX);
+}
\ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEL2Normalize.cpp b/src/runtime/NEON/functions/NEL2Normalize.cpp
deleted file mode 100644
index 349a781b0b..0000000000
--- a/src/runtime/NEON/functions/NEL2Normalize.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/functions/NEL2Normalize.h"
-
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/runtime/NEON/NEScheduler.h"
-
-using namespace arm_compute;
-
-NEL2Normalize::NEL2Normalize(std::shared_ptr<IMemoryManager> memory_manager)
-    : _memory_group(std::move(memory_manager)), _reduce_func(), _normalize_kernel(), _sumsq()
-{
-}
-
-void NEL2Normalize::configure(ITensor *input, ITensor *output, unsigned int axis, float epsilon)
-{
-    // Manage intermediate buffers
-    _memory_group.manage(&_sumsq);
-
-    // Configure Kernels
-    _reduce_func.configure(input, &_sumsq, axis, ReductionOperation::SUM_SQUARE);
-    _normalize_kernel.configure(input, &_sumsq, output, axis, epsilon);
-
-    // Allocate intermediate tensors
-    _sumsq.allocator()->allocate();
-}
-
-void NEL2Normalize::run()
-{
-    _memory_group.acquire();
-
-    _reduce_func.run();
-    NEScheduler::get().schedule(&_normalize_kernel, Window::DimY);
-
-    _memory_group.release();
-}
diff --git a/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp b/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp
new file mode 100644
index 0000000000..fa62483146
--- /dev/null
+++ b/src/runtime/NEON/functions/NEL2NormalizeLayer.cpp
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+using namespace arm_compute;
+
+NEL2NormalizeLayer::NEL2NormalizeLayer(std::shared_ptr<IMemoryManager> memory_manager)
+    : _memory_group(std::move(memory_manager)), _reduce_func(), _normalize_kernel(), _sumsq()
+{
+}
+
+void NEL2NormalizeLayer::configure(ITensor *input, ITensor *output, unsigned int axis, float epsilon)
+{
+    // Manage intermediate buffers
+    _memory_group.manage(&_sumsq);
+
+    // Configure Kernels
+    _reduce_func.configure(input, &_sumsq, axis, ReductionOperation::SUM_SQUARE);
+    _normalize_kernel.configure(input, &_sumsq, output, axis, epsilon);
+
+    // Allocate intermediate tensors
+    _sumsq.allocator()->allocate();
+}
+
+void NEL2NormalizeLayer::run()
+{
+    _memory_group.acquire();
+
+    _reduce_func.run();
+    NEScheduler::get().schedule(&_normalize_kernel, Window::DimY);
+
+    _memory_group.release();
+}
diff --git a/src/runtime/NEON/functions/NELaplacianPyramid.cpp b/src/runtime/NEON/functions/NELaplacianPyramid.cpp
index a680f1f11d..0e149d4176 100644
--- a/src/runtime/NEON/functions/NELaplacianPyramid.cpp
+++ b/src/runtime/NEON/functions/NELaplacianPyramid.cpp
@@ -28,7 +28,7 @@
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/core/Validate.h"
 #include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
-#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h"
+#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h"
 #include "arm_compute/runtime/NEON/functions/NEGaussian5x5.h"
 #include "arm_compute/runtime/NEON/functions/NEGaussianPyramid.h"
 #include "arm_compute/runtime/Tensor.h"
diff --git a/tests/benchmark/CL/DepthwiseConvolution.cpp b/tests/benchmark/CL/DepthwiseConvolution.cpp
deleted file mode 100644
index 40412da6f9..0000000000
--- a/tests/benchmark/CL/DepthwiseConvolution.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/benchmark/fixtures/DepthwiseConvolutionFixture.h"
-#include "tests/datasets/MobileNetDepthwiseConvolutionDataset.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "utils/TypePrinter.h"
-
-namespace arm_compute
-{
-namespace test
-{
-const auto data_types               = framework::dataset::make("DataType", { DataType::F32 });
-using CLDepthwiseConvolutionFixture = DepthwiseConvolutionFixture<CLTensor, CLDepthwiseConvolution, CLAccessor>;
-
-TEST_SUITE(CL)
-
-REGISTER_FIXTURE_DATA_TEST_CASE(MobileNetDepthwiseConvolution, CLDepthwiseConvolutionFixture, framework::DatasetMode::ALL,
-                                framework::dataset::combine(framework::dataset::combine(datasets::MobileNetDepthwiseConvolutionDataset(), data_types),
-                                                            framework::dataset::make("Batches", { 1 })));
-
-TEST_SUITE_END()
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/benchmark/CL/DepthwiseConvolutionLayer.cpp b/tests/benchmark/CL/DepthwiseConvolutionLayer.cpp
new file mode 100644
index 0000000000..be6fba0a90
--- /dev/null
+++ b/tests/benchmark/CL/DepthwiseConvolutionLayer.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h"
+#include "tests/datasets/MobileNetDepthwiseConvolutionLayerDataset.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+const auto data_types                    = framework::dataset::make("DataType", { DataType::F32 });
+using CLDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerFixture<CLTensor, CLDepthwiseConvolutionLayer, CLAccessor>;
+
+TEST_SUITE(CL)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(MobileNetDepthwiseConvolutionLayer, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::MobileNetDepthwiseConvolutionLayerDataset(), data_types),
+                                                            framework::dataset::make("Batches", { 1 })));
+
+TEST_SUITE_END()
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/CL/SYSTEM/MobileNet.cpp b/tests/benchmark/CL/SYSTEM/MobileNet.cpp
index c745a0acab..4712bc0c80 100644
--- a/tests/benchmark/CL/SYSTEM/MobileNet.cpp
+++ b/tests/benchmark/CL/SYSTEM/MobileNet.cpp
@@ -27,7 +27,7 @@
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
 #include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
 #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
-#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h"
+#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h"
 #include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h"
 #include "arm_compute/runtime/CL/functions/CLPoolingLayer.h"
 #include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
@@ -46,7 +46,7 @@ using CLMobileNetFixture = MobileNetFixture<CLTensor,
       CLActivationLayer,
       CLConvolutionLayer,
       CLDirectConvolutionLayer,
-      CLDepthwiseConvolution,
+      CLDepthwiseConvolutionLayer,
       CLReshapeLayer,
       CLPoolingLayer>;
 
diff --git a/tests/benchmark/CL/SYSTEM/MobileNetV1.cpp b/tests/benchmark/CL/SYSTEM/MobileNetV1.cpp
index 66be3231cf..851148a860 100644
--- a/tests/benchmark/CL/SYSTEM/MobileNetV1.cpp
+++ b/tests/benchmark/CL/SYSTEM/MobileNetV1.cpp
@@ -28,7 +28,7 @@
 #include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
 #include "arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h"
 #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
-#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h"
+#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h"
 #include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h"
 #include "arm_compute/runtime/CL/functions/CLPoolingLayer.h"
 #include "arm_compute/runtime/CL/functions/CLReshapeLayer.h"
@@ -49,7 +49,7 @@ using CLMobileNetV1_224_Fixture = MobileNetV1Fixture<CLTensor,
       CLBatchNormalizationLayer,
       CLConvolutionLayer,
       CLDirectConvolutionLayer,
-      CLDepthwiseConvolution3x3,
+      CLDepthwiseConvolutionLayer3x3,
       CLReshapeLayer,
       CLPoolingLayer,
       CLSoftmaxLayer,
@@ -61,7 +61,7 @@ using CLMobileNetV1_128_Fixture = MobileNetV1Fixture<CLTensor,
       CLBatchNormalizationLayer,
       CLConvolutionLayer,
       CLDirectConvolutionLayer,
-      CLDepthwiseConvolution3x3,
+      CLDepthwiseConvolutionLayer3x3,
       CLReshapeLayer,
       CLPoolingLayer,
       CLSoftmaxLayer,
diff --git a/tests/benchmark/fixtures/DepthwiseConvolutionFixture.h b/tests/benchmark/fixtures/DepthwiseConvolutionFixture.h
deleted file mode 100644
index 6de7bcadeb..0000000000
--- a/tests/benchmark/fixtures/DepthwiseConvolutionFixture.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_DEPTHWISECONVOLUTIONFIXTURE
-#define ARM_COMPUTE_TEST_DEPTHWISECONVOLUTIONFIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/Globals.h"
-#include "tests/Utils.h"
-#include "tests/framework/Fixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-/** Fixture that can be used for NEON and CL */
-template <typename TensorType, typename Function, typename Accessor>
-class DepthwiseConvolutionFixture : public framework::Fixture
-{
-public:
-    template <typename...>
-    void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape dst_shape, PadStrideInfo info, DataType data_type, int batches)
-    {
-        // Set batched in source and destination shapes
-        const unsigned int fixed_point_position = 4;
-        src_shape.set(3 /* batch */, batches);
-        dst_shape.set(3 /* batch */, batches);
-
-        // Create tensors
-        src     = create_tensor<TensorType>(src_shape, data_type, 1, fixed_point_position);
-        weights = create_tensor<TensorType>(weights_shape, data_type, 1, fixed_point_position);
-        biases  = create_tensor<TensorType>(biases_shape, data_type, 1, fixed_point_position);
-        dst     = create_tensor<TensorType>(dst_shape, data_type, 1, fixed_point_position);
-
-        // Create and configure function
-        depth_conv.configure(&src, &weights, &biases, &dst, info);
-
-        // Allocate tensors
-        src.allocator()->allocate();
-        weights.allocator()->allocate();
-        biases.allocator()->allocate();
-        dst.allocator()->allocate();
-
-        // Fill tensors
-        library->fill_tensor_uniform(Accessor(src), 0);
-        library->fill_tensor_uniform(Accessor(weights), 1);
-    }
-
-    void run()
-    {
-        depth_conv.run();
-    }
-
-    void teardown()
-    {
-        src.allocator()->free();
-        weights.allocator()->free();
-        biases.allocator()->free();
-        dst.allocator()->free();
-    }
-
-private:
-    TensorType src{};
-    TensorType weights{};
-    TensorType biases{};
-    TensorType dst{};
-    Function   depth_conv{};
-};
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_DEPTHWISECONVOLUTIONFIXTURE */
diff --git a/tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h
new file mode 100644
index 0000000000..9a49d5613a
--- /dev/null
+++ b/tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_DEPTHWISECONVOLUTIONFIXTURE
+#define ARM_COMPUTE_TEST_DEPTHWISECONVOLUTIONFIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/Globals.h"
+#include "tests/Utils.h"
+#include "tests/framework/Fixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+/** Fixture that can be used for NEON and CL */
+template <typename TensorType, typename Function, typename Accessor>
+class DepthwiseConvolutionLayerFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape dst_shape, PadStrideInfo info, DataType data_type, int batches)
+    {
+        // Set batched in source and destination shapes
+        const unsigned int fixed_point_position = 4;
+        src_shape.set(3 /* batch */, batches);
+        dst_shape.set(3 /* batch */, batches);
+
+        // Create tensors
+        src     = create_tensor<TensorType>(src_shape, data_type, 1, fixed_point_position);
+        weights = create_tensor<TensorType>(weights_shape, data_type, 1, fixed_point_position);
+        biases  = create_tensor<TensorType>(biases_shape, data_type, 1, fixed_point_position);
+        dst     = create_tensor<TensorType>(dst_shape, data_type, 1, fixed_point_position);
+
+        // Create and configure function
+        depth_conv.configure(&src, &weights, &biases, &dst, info);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        weights.allocator()->allocate();
+        biases.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        // Fill tensors
+        library->fill_tensor_uniform(Accessor(src), 0);
+        library->fill_tensor_uniform(Accessor(weights), 1);
+    }
+
+    void run()
+    {
+        depth_conv.run();
+    }
+
+    void teardown()
+    {
+        src.allocator()->free();
+        weights.allocator()->free();
+        biases.allocator()->free();
+        dst.allocator()->free();
+    }
+
+private:
+    TensorType src{};
+    TensorType weights{};
+    TensorType biases{};
+    TensorType dst{};
+    Function   depth_conv{};
+};
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_DEPTHWISECONVOLUTIONFIXTURE */
diff --git a/tests/benchmark/fixtures/MobileNetFixture.h b/tests/benchmark/fixtures/MobileNetFixture.h
index 6c1ee300c1..660205c7ec 100644
--- a/tests/benchmark/fixtures/MobileNetFixture.h
+++ b/tests/benchmark/fixtures/MobileNetFixture.h
@@ -38,7 +38,7 @@ template <typename TensorType,
           typename ActivationLayerFunction,
           typename ConvolutionLayerFunction,
           typename DirectConvolutionLayerFunction,
-          typename DepthwiseConvolutionFunction,
+          typename DepthwiseConvolutionLayerFunction,
           typename ReshapeFunction,
           typename PoolingLayerFunction>
 class MobileNetFixture : public framework::Fixture
@@ -69,7 +69,7 @@ private:
              ActivationLayerFunction,
              ConvolutionLayerFunction,
              DirectConvolutionLayerFunction,
-             DepthwiseConvolutionFunction,
+             DepthwiseConvolutionLayerFunction,
              ReshapeFunction,
              PoolingLayerFunction>
              network{};
diff --git a/tests/datasets/DepthwiseConvolutionDataset.h b/tests/datasets/DepthwiseConvolutionDataset.h
deleted file mode 100644
index 2c8347fc8c..0000000000
--- a/tests/datasets/DepthwiseConvolutionDataset.h
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_DATASET
-#define ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_DATASET
-
-#include "utils/TypePrinter.h"
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace datasets
-{
-class DepthwiseConvolutionDataset
-{
-public:
-    using type = std::tuple<TensorShape, TensorShape, TensorShape, TensorShape, PadStrideInfo>;
-
-    struct iterator
-    {
-        iterator(std::vector<TensorShape>::const_iterator   src_it,
-                 std::vector<TensorShape>::const_iterator   weights_it,
-                 std::vector<TensorShape>::const_iterator   biases_it,
-                 std::vector<TensorShape>::const_iterator   dst_it,
-                 std::vector<PadStrideInfo>::const_iterator infos_it)
-            : _src_it{ std::move(src_it) },
-              _weights_it{ std::move(weights_it) },
-              _biases_it{ std::move(biases_it) },
-              _dst_it{ std::move(dst_it) },
-              _infos_it{ std::move(infos_it) }
-        {
-        }
-
-        std::string description() const
-        {
-            std::stringstream description;
-            description << "In=" << *_src_it << ":";
-            description << "Weights=" << *_weights_it << ":";
-            description << "Biases=" << *_biases_it << ":";
-            description << "Out=" << *_dst_it << ":";
-            description << "Info=" << *_infos_it;
-            return description.str();
-        }
-
-        DepthwiseConvolutionDataset::type operator*() const
-        {
-            return std::make_tuple(*_src_it, *_weights_it, *_biases_it, *_dst_it, *_infos_it);
-        }
-
-        iterator &operator++()
-        {
-            ++_src_it;
-            ++_weights_it;
-            ++_biases_it;
-            ++_dst_it;
-            ++_infos_it;
-
-            return *this;
-        }
-
-    private:
-        std::vector<TensorShape>::const_iterator   _src_it;
-        std::vector<TensorShape>::const_iterator   _weights_it;
-        std::vector<TensorShape>::const_iterator   _biases_it;
-        std::vector<TensorShape>::const_iterator   _dst_it;
-        std::vector<PadStrideInfo>::const_iterator _infos_it;
-    };
-
-    iterator begin() const
-    {
-        return iterator(_src_shapes.begin(), _weight_shapes.begin(), _biases_shapes.begin(), _dst_shapes.begin(), _infos.begin());
-    }
-
-    int size() const
-    {
-        return std::min(_src_shapes.size(), std::min(_weight_shapes.size(), std::min(_biases_shapes.size(), std::min(_dst_shapes.size(), _infos.size()))));
-    }
-
-    void add_config(TensorShape src, TensorShape weights, TensorShape biases, TensorShape dst, PadStrideInfo info)
-    {
-        _src_shapes.emplace_back(std::move(src));
-        _weight_shapes.emplace_back(std::move(weights));
-        _biases_shapes.emplace_back(std::move(biases));
-        _dst_shapes.emplace_back(std::move(dst));
-        _infos.emplace_back(std::move(info));
-    }
-
-protected:
-    DepthwiseConvolutionDataset()                               = default;
-    DepthwiseConvolutionDataset(DepthwiseConvolutionDataset &&) = default;
-
-private:
-    std::vector<TensorShape>   _src_shapes{};
-    std::vector<TensorShape>   _weight_shapes{};
-    std::vector<TensorShape>   _biases_shapes{};
-    std::vector<TensorShape>   _dst_shapes{};
-    std::vector<PadStrideInfo> _infos{};
-};
-class SmallDepthwiseConvolutionDataset final : public DepthwiseConvolutionDataset
-{
-public:
-    SmallDepthwiseConvolutionDataset()
-    {
-        add_config(TensorShape(7U, 7U, 3U), TensorShape(3U, 3U, 3U), TensorShape(3U), TensorShape(5U, 5U, 3U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(23U, 27U, 5U), TensorShape(3U, 5U, 5U), TensorShape(5U), TensorShape(11U, 23U, 5U), PadStrideInfo(2, 1, 0, 0));
-        add_config(TensorShape(33U, 27U, 7U), TensorShape(7U, 3U, 7U), TensorShape(7U), TensorShape(10U, 13U, 7U), PadStrideInfo(3, 2, 1, 0));
-        add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(31U, 14U, 11U), PadStrideInfo(1, 2, 0, 1));
-        add_config(TensorShape(17U, 31U, 2U), TensorShape(5U, 9U, 2U), TensorShape(2U), TensorShape(15U, 13U, 2U), PadStrideInfo(1, 2, 1, 1));
-        add_config(TensorShape(23U, 27U, 5U), TensorShape(11U, 3U, 5U), TensorShape(5U), TensorShape(13U, 13U, 5U), PadStrideInfo(1, 2, 0, 0));
-        add_config(TensorShape(17U, 31U, 2U, 3U), TensorShape(5U, 9U, 2U), TensorShape(2U), TensorShape(15U, 13U, 2U, 3U), PadStrideInfo(1, 2, 1, 1));
-        // Asymmetric padding
-        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 1, 2, 0, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 1, 0, 2, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 2, 1, 2, 0, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 3, 0, 2, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(10U, 11U, 7U), PadStrideInfo(3, 2, 1, 0, 1, 0, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(10U, 11U, 7U), PadStrideInfo(3, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
-    }
-};
-
-class LargeDepthwiseConvolutionDataset final : public DepthwiseConvolutionDataset
-{
-public:
-    LargeDepthwiseConvolutionDataset()
-    {
-        add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(116U, 275U, 55U), PadStrideInfo(2, 1, 0, 0));
-        add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(111U, 138U, 77U), PadStrideInfo(3, 2, 1, 0));
-        add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(177U, 156U, 22U), PadStrideInfo(1, 2, 1, 1));
-        add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(231U, 138U, 55U), PadStrideInfo(1, 2, 0, 0));
-        add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(166U, 93U, 77U), PadStrideInfo(2, 3, 0, 1));
-        add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(89U, 311U, 22U), PadStrideInfo(2, 1, 1, 1));
-    }
-};
-
-class SmallDepthwiseConvolutionDataset3x3 final : public DepthwiseConvolutionDataset
-{
-public:
-    SmallDepthwiseConvolutionDataset3x3()
-    {
-        add_config(TensorShape(7U, 7U, 3U, 2U), TensorShape(3U, 3U, 3U), TensorShape(3U), TensorShape(5U, 5U, 3U, 2U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(11U, 14U, 11U), PadStrideInfo(3, 2, 1, 1));
-        add_config(TensorShape(21U, 31U, 9U, 4U), TensorShape(3U, 3U, 9U), TensorShape(9U), TensorShape(21U, 15U, 9U, 4U), PadStrideInfo(1, 2, 1, 0));
-        add_config(TensorShape(33U, 27U, 11U, 3U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(31U, 14U, 11U, 3U), PadStrideInfo(1, 2, 0, 1));
-    }
-};
-
-class LargeDepthwiseConvolutionDataset3x3 final : public DepthwiseConvolutionDataset
-{
-public:
-    LargeDepthwiseConvolutionDataset3x3()
-    {
-        add_config(TensorShape(233U, 277U, 55U, 3U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(116U, 275U, 55U, 3U), PadStrideInfo(2, 1, 0, 0));
-        add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(111U, 138U, 77U), PadStrideInfo(3, 2, 1, 0));
-        add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(177U, 156U, 22U), PadStrideInfo(1, 2, 1, 1));
-        add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(231U, 138U, 55U), PadStrideInfo(1, 2, 0, 0));
-        add_config(TensorShape(333U, 277U, 77U, 5U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(166U, 93U, 77U, 5U), PadStrideInfo(2, 3, 0, 1));
-        add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(89U, 311U, 22U), PadStrideInfo(2, 1, 1, 1));
-    }
-};
-} // namespace datasets
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_DATASET */
diff --git a/tests/datasets/DepthwiseConvolutionLayerDataset.h b/tests/datasets/DepthwiseConvolutionLayerDataset.h
new file mode 100644
index 0000000000..a2caba9b2c
--- /dev/null
+++ b/tests/datasets/DepthwiseConvolutionLayerDataset.h
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_DATASET
+#define ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_DATASET
+
+#include "utils/TypePrinter.h"
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+class DepthwiseConvolutionLayerDataset
+{
+public:
+    using type = std::tuple<TensorShape, TensorShape, TensorShape, TensorShape, PadStrideInfo>;
+
+    struct iterator
+    {
+        iterator(std::vector<TensorShape>::const_iterator   src_it,
+                 std::vector<TensorShape>::const_iterator   weights_it,
+                 std::vector<TensorShape>::const_iterator   biases_it,
+                 std::vector<TensorShape>::const_iterator   dst_it,
+                 std::vector<PadStrideInfo>::const_iterator infos_it)
+            : _src_it{ std::move(src_it) },
+              _weights_it{ std::move(weights_it) },
+              _biases_it{ std::move(biases_it) },
+              _dst_it{ std::move(dst_it) },
+              _infos_it{ std::move(infos_it) }
+        {
+        }
+
+        std::string description() const
+        {
+            std::stringstream description;
+            description << "In=" << *_src_it << ":";
+            description << "Weights=" << *_weights_it << ":";
+            description << "Biases=" << *_biases_it << ":";
+            description << "Out=" << *_dst_it << ":";
+            description << "Info=" << *_infos_it;
+            return description.str();
+        }
+
+        DepthwiseConvolutionLayerDataset::type operator*() const
+        {
+            return std::make_tuple(*_src_it, *_weights_it, *_biases_it, *_dst_it, *_infos_it);
+        }
+
+        iterator &operator++()
+        {
+            ++_src_it;
+            ++_weights_it;
+            ++_biases_it;
+            ++_dst_it;
+            ++_infos_it;
+
+            return *this;
+        }
+
+    private:
+        std::vector<TensorShape>::const_iterator   _src_it;
+        std::vector<TensorShape>::const_iterator   _weights_it;
+        std::vector<TensorShape>::const_iterator   _biases_it;
+        std::vector<TensorShape>::const_iterator   _dst_it;
+        std::vector<PadStrideInfo>::const_iterator _infos_it;
+    };
+
+    iterator begin() const
+    {
+        return iterator(_src_shapes.begin(), _weight_shapes.begin(), _biases_shapes.begin(), _dst_shapes.begin(), _infos.begin());
+    }
+
+    int size() const
+    {
+        return std::min(_src_shapes.size(), std::min(_weight_shapes.size(), std::min(_biases_shapes.size(), std::min(_dst_shapes.size(), _infos.size()))));
+    }
+
+    void add_config(TensorShape src, TensorShape weights, TensorShape biases, TensorShape dst, PadStrideInfo info)
+    {
+        _src_shapes.emplace_back(std::move(src));
+        _weight_shapes.emplace_back(std::move(weights));
+        _biases_shapes.emplace_back(std::move(biases));
+        _dst_shapes.emplace_back(std::move(dst));
+        _infos.emplace_back(std::move(info));
+    }
+
+protected:
+    DepthwiseConvolutionLayerDataset()                                    = default;
+    DepthwiseConvolutionLayerDataset(DepthwiseConvolutionLayerDataset &&) = default;
+
+private:
+    std::vector<TensorShape>   _src_shapes{};
+    std::vector<TensorShape>   _weight_shapes{};
+    std::vector<TensorShape>   _biases_shapes{};
+    std::vector<TensorShape>   _dst_shapes{};
+    std::vector<PadStrideInfo> _infos{};
+};
+class SmallDepthwiseConvolutionLayerDataset final : public DepthwiseConvolutionLayerDataset
+{
+public:
+    SmallDepthwiseConvolutionLayerDataset()
+    {
+        add_config(TensorShape(7U, 7U, 3U), TensorShape(3U, 3U, 3U), TensorShape(3U), TensorShape(5U, 5U, 3U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(23U, 27U, 5U), TensorShape(3U, 5U, 5U), TensorShape(5U), TensorShape(11U, 23U, 5U), PadStrideInfo(2, 1, 0, 0));
+        add_config(TensorShape(33U, 27U, 7U), TensorShape(7U, 3U, 7U), TensorShape(7U), TensorShape(10U, 13U, 7U), PadStrideInfo(3, 2, 1, 0));
+        add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(31U, 14U, 11U), PadStrideInfo(1, 2, 0, 1));
+        add_config(TensorShape(17U, 31U, 2U), TensorShape(5U, 9U, 2U), TensorShape(2U), TensorShape(15U, 13U, 2U), PadStrideInfo(1, 2, 1, 1));
+        add_config(TensorShape(23U, 27U, 5U), TensorShape(11U, 3U, 5U), TensorShape(5U), TensorShape(13U, 13U, 5U), PadStrideInfo(1, 2, 0, 0));
+        add_config(TensorShape(17U, 31U, 2U, 3U), TensorShape(5U, 9U, 2U), TensorShape(2U), TensorShape(15U, 13U, 2U, 3U), PadStrideInfo(1, 2, 1, 1));
+        // Asymmetric padding
+        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 1, 2, 0, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 1, 0, 2, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 2, 1, 2, 0, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 3, 0, 2, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(10U, 11U, 7U), PadStrideInfo(3, 2, 1, 0, 1, 0, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(10U, 11U, 7U), PadStrideInfo(3, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
+    }
+};
+
+class LargeDepthwiseConvolutionLayerDataset final : public DepthwiseConvolutionLayerDataset
+{
+public:
+    LargeDepthwiseConvolutionLayerDataset()
+    {
+        add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(116U, 275U, 55U), PadStrideInfo(2, 1, 0, 0));
+        add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(111U, 138U, 77U), PadStrideInfo(3, 2, 1, 0));
+        add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(177U, 156U, 22U), PadStrideInfo(1, 2, 1, 1));
+        add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(231U, 138U, 55U), PadStrideInfo(1, 2, 0, 0));
+        add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(166U, 93U, 77U), PadStrideInfo(2, 3, 0, 1));
+        add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(89U, 311U, 22U), PadStrideInfo(2, 1, 1, 1));
+    }
+};
+
+class SmallDepthwiseConvolutionLayerDataset3x3 final : public DepthwiseConvolutionLayerDataset
+{
+public:
+    SmallDepthwiseConvolutionLayerDataset3x3()
+    {
+        add_config(TensorShape(7U, 7U, 3U, 2U), TensorShape(3U, 3U, 3U), TensorShape(3U), TensorShape(5U, 5U, 3U, 2U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(11U, 14U, 11U), PadStrideInfo(3, 2, 1, 1));
+        add_config(TensorShape(21U, 31U, 9U, 4U), TensorShape(3U, 3U, 9U), TensorShape(9U), TensorShape(21U, 15U, 9U, 4U), PadStrideInfo(1, 2, 1, 0));
+        add_config(TensorShape(33U, 27U, 11U, 3U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(31U, 14U, 11U, 3U), PadStrideInfo(1, 2, 0, 1));
+    }
+};
+
+class LargeDepthwiseConvolutionLayerDataset3x3 final : public DepthwiseConvolutionLayerDataset
+{
+public:
+    LargeDepthwiseConvolutionLayerDataset3x3()
+    {
+        add_config(TensorShape(233U, 277U, 55U, 3U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(116U, 275U, 55U, 3U), PadStrideInfo(2, 1, 0, 0));
+        add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(111U, 138U, 77U), PadStrideInfo(3, 2, 1, 0));
+        add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(177U, 156U, 22U), PadStrideInfo(1, 2, 1, 1));
+        add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(231U, 138U, 55U), PadStrideInfo(1, 2, 0, 0));
+        add_config(TensorShape(333U, 277U, 77U, 5U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(166U, 93U, 77U, 5U), PadStrideInfo(2, 3, 0, 1));
+        add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(89U, 311U, 22U), PadStrideInfo(2, 1, 1, 1));
+    }
+};
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_DATASET */
diff --git a/tests/datasets/MobileNetDepthwiseConvolutionDataset.h b/tests/datasets/MobileNetDepthwiseConvolutionDataset.h
deleted file mode 100644
index 918815f41e..0000000000
--- a/tests/datasets/MobileNetDepthwiseConvolutionDataset.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_MOBILENET_DEPTHWISE_CONVOLUTION_DATASET
-#define ARM_COMPUTE_TEST_MOBILENET_DEPTHWISE_CONVOLUTION_DATASET
-
-#include "tests/datasets/DepthwiseConvolutionDataset.h"
-
-#include "utils/TypePrinter.h"
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace datasets
-{
-class MobileNetDepthwiseConvolutionDataset final : public DepthwiseConvolutionDataset
-{
-public:
-    MobileNetDepthwiseConvolutionDataset()
-    {
-        add_config(TensorShape(7U, 7U, 1024U), TensorShape(3U, 3U, 1024U), TensorShape(1024U), TensorShape(3U, 3U, 1024U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U), TensorShape(512U), TensorShape(7U, 7U, 512U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(28U, 28U, 256U), TensorShape(3U, 3U, 256U), TensorShape(256U), TensorShape(14U, 14U, 256U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(28U, 28U, 256U), TensorShape(3U, 3U, 256U), TensorShape(256U), TensorShape(28U, 28U, 256U), PadStrideInfo(1, 1, 1, 1));
-        add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U), TensorShape(128U), TensorShape(28U, 28U, 128U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U), TensorShape(128U), TensorShape(56U, 56U, 128U), PadStrideInfo(1, 1, 1, 1));
-        add_config(TensorShape(112U, 112U, 64U), TensorShape(3U, 3U, 64U), TensorShape(64U), TensorShape(56U, 56U, 64U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
-        add_config(TensorShape(112U, 112U, 32U), TensorShape(3U, 3U, 32U), TensorShape(32U), TensorShape(112U, 112U, 32U), PadStrideInfo(1, 1, 1, 1));
-    }
-};
-} // namespace datasets
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_MOBILENET_DEPTHWISE_CONVOLUTION_DATASET */
diff --git a/tests/datasets/MobileNetDepthwiseConvolutionLayerDataset.h b/tests/datasets/MobileNetDepthwiseConvolutionLayerDataset.h
new file mode 100644
index 0000000000..5531a08d8e
--- /dev/null
+++ b/tests/datasets/MobileNetDepthwiseConvolutionLayerDataset.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_MOBILENET_DEPTHWISE_CONVOLUTION_DATASET
+#define ARM_COMPUTE_TEST_MOBILENET_DEPTHWISE_CONVOLUTION_DATASET
+
+#include "tests/datasets/DepthwiseConvolutionLayerDataset.h"
+
+#include "utils/TypePrinter.h"
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+class MobileNetDepthwiseConvolutionLayerDataset final : public DepthwiseConvolutionLayerDataset
+{
+public:
+    MobileNetDepthwiseConvolutionLayerDataset()
+    {
+        add_config(TensorShape(7U, 7U, 1024U), TensorShape(3U, 3U, 1024U), TensorShape(1024U), TensorShape(3U, 3U, 1024U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U), TensorShape(512U), TensorShape(7U, 7U, 512U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(28U, 28U, 256U), TensorShape(3U, 3U, 256U), TensorShape(256U), TensorShape(14U, 14U, 256U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(28U, 28U, 256U), TensorShape(3U, 3U, 256U), TensorShape(256U), TensorShape(28U, 28U, 256U), PadStrideInfo(1, 1, 1, 1));
+        add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U), TensorShape(128U), TensorShape(28U, 28U, 128U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U), TensorShape(128U), TensorShape(56U, 56U, 128U), PadStrideInfo(1, 1, 1, 1));
+        add_config(TensorShape(112U, 112U, 64U), TensorShape(3U, 3U, 64U), TensorShape(64U), TensorShape(56U, 56U, 64U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(112U, 112U, 32U), TensorShape(3U, 3U, 32U), TensorShape(32U), TensorShape(112U, 112U, 32U), PadStrideInfo(1, 1, 1, 1));
+    }
+};
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_MOBILENET_DEPTHWISE_CONVOLUTION_DATASET */
diff --git a/tests/datasets/ShapeDatasets.h b/tests/datasets/ShapeDatasets.h
index 173ee74958..02a71aa7b5 100644
--- a/tests/datasets/ShapeDatasets.h
+++ b/tests/datasets/ShapeDatasets.h
@@ -269,11 +269,11 @@ public:
     }
 };
 
-/** Data set containing 2D tensor shapes for DepthConcatenate. */
-class DepthConcatenateShapes final : public ShapeDataset
+/** Data set containing 2D tensor shapes for DepthConcatenateLayer. */
+class DepthConcatenateLayerShapes final : public ShapeDataset
 {
 public:
-    DepthConcatenateShapes()
+    DepthConcatenateLayerShapes()
         : ShapeDataset("Shape",
     {
         TensorShape{ 322U, 243U },
diff --git a/tests/networks/MobileNetNetwork.h b/tests/networks/MobileNetNetwork.h
index 74dce0e348..1bc8ad9a0c 100644
--- a/tests/networks/MobileNetNetwork.h
+++ b/tests/networks/MobileNetNetwork.h
@@ -47,7 +47,7 @@ template <typename TensorType,
           typename ActivationLayerFunction,
           typename ConvolutionLayerFunction,
           typename DirectConvolutionLayerFunction,
-          typename DepthwiseConvolutionFunction,
+          typename DepthwiseConvolutionLayerFunction,
           typename ReshapeFunction,
           typename PoolingLayerFunction>
 class MobileNetNetwork
@@ -279,9 +279,9 @@ private:
 
     ConvolutionLayerFunction conv3x3{};
     ActivationLayerFunction  conv3x3_act{};
-    std::array<ActivationLayerFunction, 26>        act{ {} };
-    std::array<DirectConvolutionLayerFunction, 14> conv1x1{ {} };
-    std::array<DepthwiseConvolutionFunction, 13>   dwc3x3{ {} };
+    std::array<ActivationLayerFunction, 26>           act{ {} };
+    std::array<DirectConvolutionLayerFunction, 14>    conv1x1{ {} };
+    std::array<DepthwiseConvolutionLayerFunction, 13> dwc3x3{ {} };
     PoolingLayerFunction    pool{};
     ActivationLayerFunction logistic{};
     ReshapeFunction         reshape{};
diff --git a/tests/validation/CL/DepthConcatenateLayer.cpp b/tests/validation/CL/DepthConcatenateLayer.cpp
index 19a8b369ce..02901371bb 100644
--- a/tests/validation/CL/DepthConcatenateLayer.cpp
+++ b/tests/validation/CL/DepthConcatenateLayer.cpp
@@ -24,7 +24,7 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLDepthConcatenate.h"
+#include "arm_compute/runtime/CL/functions/CLDepthConcatenateLayer.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/datasets/ShapeDatasets.h"
 #include "tests/framework/Asserts.h"
@@ -45,7 +45,7 @@ TEST_SUITE(DepthConcatenateLayer)
 //TODO(COMPMID-415): Add configuration test?
 
 template <typename T>
-using CLDepthConcatenateLayerFixture = DepthConcatenateValidationFixture<CLTensor, ICLTensor, CLAccessor, CLDepthConcatenate, T>;
+using CLDepthConcatenateLayerFixture = DepthConcatenateLayerValidationFixture<CLTensor, ICLTensor, CLAccessor, CLDepthConcatenateLayer, T>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
@@ -70,7 +70,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConcatenateLayerFixture<float>, framewor
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(), framework::dataset::make("DataType",
                                                                                                                  DataType::F32)))
 {
     // Validate output
@@ -88,7 +88,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConcatenateLayerFixture<int8_t>, framewo
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(),
                                                                                                                   framework::dataset::make("DataType",
                                                                                                                           DataType::QS8)))
 {
@@ -105,7 +105,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConcatenateLayerFixture<int16_t>, framew
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConcatenateLayerFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(),
                                                                                                                    framework::dataset::make("DataType",
                                                                                                                            DataType::QS16)))
 {
diff --git a/tests/validation/CL/DepthConvert.cpp b/tests/validation/CL/DepthConvert.cpp
deleted file mode 100644
index 57669f0a52..0000000000
--- a/tests/validation/CL/DepthConvert.cpp
+++ /dev/null
@@ -1,484 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLDepthConvert.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ConvertPolicyDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/DepthConvertFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-/** Input data sets **/
-const auto DepthConvertU8toU16Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16));
-const auto DepthConvertU8toS16Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16));
-const auto DepthConvertU8toS32Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32));
-const auto DepthConvertU16toU8Dataset             = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8));
-const auto DepthConvertU16toU32Dataset            = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32));
-const auto DepthConvertS16toU8Dataset             = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8));
-const auto DepthConvertS16toS32Dataset            = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32));
-const auto DepthConvertQS8toFP32Dataset           = combine(framework::dataset::make("DataType", DataType::QS8), framework::dataset::make("DataType", DataType::F32));
-const auto DepthConvertQS16toFP32Dataset          = combine(framework::dataset::make("DataType", DataType::QS16), framework::dataset::make("DataType", DataType::F32));
-const auto DepthConvertFP32toQS8Dataset           = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS8));
-const auto DepthConvertFP32toQS16Dataset          = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS16));
-const auto DepthConvertShiftDataset               = framework::dataset::make("Shift", 0, 7);
-const auto DepthConvertFixedPointQuantizedDataset = framework::dataset::make("FractionalBits", 1, 7);
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(DepthConvert)
-template <typename T>
-using CLDepthConvertToU16Fixture = DepthConvertValidationFixture<CLTensor, CLAccessor, CLDepthConvert, T, uint16_t>;
-template <typename T>
-using CLDepthConvertToS16Fixture = DepthConvertValidationFixture<CLTensor, CLAccessor, CLDepthConvert, T, int16_t>;
-template <typename T>
-using CLDepthConvertToS32Fixture = DepthConvertValidationFixture<CLTensor, CLAccessor, CLDepthConvert, T, int32_t>;
-template <typename T>
-using CLDepthConvertToU8Fixture = DepthConvertValidationFixture<CLTensor, CLAccessor, CLDepthConvert, T, uint8_t>;
-template <typename T>
-using CLDepthConvertToU32Fixture = DepthConvertValidationFixture<CLTensor, CLAccessor, CLDepthConvert, T, uint32_t>;
-template <typename T>
-using CLDepthConvertToFP32FixedPointFixture = DepthConvertValidationFractionalBitsFixture<CLTensor, CLAccessor, CLDepthConvert, T, float>;
-template <typename T>
-using CLDepthConvertToQS8FixedPointFixture = DepthConvertValidationFractionalBitsFixture<CLTensor, CLAccessor, CLDepthConvert, T, int8_t>;
-template <typename T>
-using CLDepthConvertToQS16FixedPointFixture = DepthConvertValidationFractionalBitsFixture<CLTensor, CLAccessor, CLDepthConvert, T, int16_t>;
-
-TEST_SUITE(U8_to_U16)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, DataType::U8, 1, fixed_point_position);
-    CLTensor dst = create_tensor<CLTensor>(shape, DataType::U16, 1, fixed_point_position);
-
-    // Create and Configure function
-    CLDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertToU16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU8toU16Dataset),
-                                                                                                                 framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                 DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertToU16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU8toU16Dataset),
-                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                               DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(U8_to_S16)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, DataType::U8, 1, fixed_point_position);
-    CLTensor dst = create_tensor<CLTensor>(shape, DataType::S16, 1, fixed_point_position);
-
-    // Create and Configure function
-    CLDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertToS16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU8toS16Dataset),
-                                                                                                                 framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                 DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertToS16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU8toS16Dataset),
-                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                               DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-TEST_SUITE(U8_to_S32)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, DataType::U8, 1, fixed_point_position);
-    CLTensor dst = create_tensor<CLTensor>(shape, DataType::S32, 1, fixed_point_position);
-
-    // Create and Configure function
-    CLDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertToS32Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU8toS32Dataset),
-                                                                                                                 framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                 DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertToS32Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU8toS32Dataset),
-                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                               DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(U16_to_U8)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, DataType::U16, 1, fixed_point_position);
-    CLTensor dst = create_tensor<CLTensor>(shape, DataType::U8, 1, fixed_point_position);
-
-    // Create and Configure function
-    CLDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertToU8Fixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU16toU8Dataset),
-                                                                                                                 framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                 DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertToU8Fixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU16toU8Dataset),
-                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                               DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(U16_to_U32)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, DataType::U16, 1, fixed_point_position);
-    CLTensor dst = create_tensor<CLTensor>(shape, DataType::U32, 1, fixed_point_position);
-
-    // Create and Configure function
-    CLDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertToU32Fixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU16toU32Dataset),
-                                                                                                                  framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                  DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertToU32Fixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU16toU32Dataset),
-                                                                                                                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(S16_to_U8)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, DataType::S16, 1, fixed_point_position);
-    CLTensor dst = create_tensor<CLTensor>(shape, DataType::U8, 1, fixed_point_position);
-
-    // Create and Configure function
-    CLDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertToU8Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertS16toU8Dataset),
-                                                                                                                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertToU8Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertS16toU8Dataset),
-                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                              DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(S16_to_S32)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, DataType::S16, 1, fixed_point_position);
-    CLTensor dst = create_tensor<CLTensor>(shape, DataType::S32, 1, fixed_point_position);
-
-    // Create and Configure function
-    CLDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertToS32Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertS16toS32Dataset),
-                                                                                                                 framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                 DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertToS32Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertS16toS32Dataset),
-                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                               DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(Quantized_to_FP32)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })),
-                                                                           framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertFixedPointQuantizedDataset),
-               shape, dt, policy, fixed_point_position)
-{
-    int shift = 0;
-
-    // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, dt, 1, fixed_point_position);
-    CLTensor dst = create_tensor<CLTensor>(shape, DataType::F32, 1, fixed_point_position);
-
-    // Create and Configure function
-    CLDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-FIXTURE_DATA_TEST_CASE(RunSmallQS8, CLDepthConvertToFP32FixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertQS8toFP32Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunSmallQS16, CLDepthConvertToFP32FixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertQS16toFP32Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLargeQS8, CLDepthConvertToFP32FixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertQS8toFP32Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLargeQS16, CLDepthConvertToFP32FixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertQS16toFP32Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(FP32_to_Quantized)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })),
-                                                                           framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertFixedPointQuantizedDataset),
-               shape, dt, policy, fixed_point_position)
-{
-    int shift = 0;
-
-    // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape, DataType::F32, 1, fixed_point_position);
-    CLTensor dst = create_tensor<CLTensor>(shape, dt, 1, fixed_point_position);
-
-    // Create and Configure function
-    CLDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-FIXTURE_DATA_TEST_CASE(RunSmallQS8, CLDepthConvertToQS8FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertFP32toQS8Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunSmallQS16, CLDepthConvertToQS16FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertFP32toQS16Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLargeQS8, CLDepthConvertToQS8FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertFP32toQS8Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLargeQS16, CLDepthConvertToQS16FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertFP32toQS16Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/DepthConvertLayer.cpp b/tests/validation/CL/DepthConvertLayer.cpp
new file mode 100644
index 0000000000..9c6cc46ca8
--- /dev/null
+++ b/tests/validation/CL/DepthConvertLayer.cpp
@@ -0,0 +1,492 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLDepthConvertLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ConvertPolicyDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DepthConvertLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Input data sets **/
+const auto DepthConvertLayerU8toU16Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16));
+const auto DepthConvertLayerU8toS16Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16));
+const auto DepthConvertLayerU8toS32Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32));
+const auto DepthConvertLayerU16toU8Dataset             = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8));
+const auto DepthConvertLayerU16toU32Dataset            = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32));
+const auto DepthConvertLayerS16toU8Dataset             = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8));
+const auto DepthConvertLayerS16toS32Dataset            = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32));
+const auto DepthConvertLayerQS8toFP32Dataset           = combine(framework::dataset::make("DataType", DataType::QS8), framework::dataset::make("DataType", DataType::F32));
+const auto DepthConvertLayerQS16toFP32Dataset          = combine(framework::dataset::make("DataType", DataType::QS16), framework::dataset::make("DataType", DataType::F32));
+const auto DepthConvertLayerFP32toQS8Dataset           = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS8));
+const auto DepthConvertLayerFP32toQS16Dataset          = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS16));
+const auto DepthConvertLayerShiftDataset               = framework::dataset::make("Shift", 0, 7);
+const auto DepthConvertLayerFixedPointQuantizedDataset = framework::dataset::make("FractionalBits", 1, 7);
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(DepthConvertLayer)
+template <typename T>
+using CLDepthConvertLayerToU16Fixture = DepthConvertLayerValidationFixture<CLTensor, CLAccessor, CLDepthConvertLayer, T, uint16_t>;
+template <typename T>
+using CLDepthConvertLayerToS16Fixture = DepthConvertLayerValidationFixture<CLTensor, CLAccessor, CLDepthConvertLayer, T, int16_t>;
+template <typename T>
+using CLDepthConvertLayerToS32Fixture = DepthConvertLayerValidationFixture<CLTensor, CLAccessor, CLDepthConvertLayer, T, int32_t>;
+template <typename T>
+using CLDepthConvertLayerToU8Fixture = DepthConvertLayerValidationFixture<CLTensor, CLAccessor, CLDepthConvertLayer, T, uint8_t>;
+template <typename T>
+using CLDepthConvertLayerToU32Fixture = DepthConvertLayerValidationFixture<CLTensor, CLAccessor, CLDepthConvertLayer, T, uint32_t>;
+template <typename T>
+using CLDepthConvertLayerToFP32FixedPointFixture = DepthConvertLayerValidationFractionalBitsFixture<CLTensor, CLAccessor, CLDepthConvertLayer, T, float>;
+template <typename T>
+using CLDepthConvertLayerToQS8FixedPointFixture = DepthConvertLayerValidationFractionalBitsFixture<CLTensor, CLAccessor, CLDepthConvertLayer, T, int8_t>;
+template <typename T>
+using CLDepthConvertLayerToQS16FixedPointFixture = DepthConvertLayerValidationFractionalBitsFixture<CLTensor, CLAccessor, CLDepthConvertLayer, T, int16_t>;
+
+TEST_SUITE(U8_to_U16)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::U8, 1, fixed_point_position);
+    CLTensor dst = create_tensor<CLTensor>(shape, DataType::U16, 1, fixed_point_position);
+
+    // Create and Configure function
+    CLDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToU16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toU16Dataset),
+                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                      DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToU16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toU16Dataset),
+                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                    DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U8_to_S16)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::U8, 1, fixed_point_position);
+    CLTensor dst = create_tensor<CLTensor>(shape, DataType::S16, 1, fixed_point_position);
+
+    // Create and Configure function
+    CLDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToS16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toS16Dataset),
+                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                      DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToS16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toS16Dataset),
+                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                    DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+TEST_SUITE(U8_to_S32)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::U8, 1, fixed_point_position);
+    CLTensor dst = create_tensor<CLTensor>(shape, DataType::S32, 1, fixed_point_position);
+
+    // Create and Configure function
+    CLDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToS32Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toS32Dataset),
+                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                      DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToS32Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toS32Dataset),
+                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                    DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U16_to_U8)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::U16, 1, fixed_point_position);
+    CLTensor dst = create_tensor<CLTensor>(shape, DataType::U8, 1, fixed_point_position);
+
+    // Create and Configure function
+    CLDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToU8Fixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU16toU8Dataset),
+                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                      DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToU8Fixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU16toU8Dataset),
+                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                    DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U16_to_U32)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::U16, 1, fixed_point_position);
+    CLTensor dst = create_tensor<CLTensor>(shape, DataType::U32, 1, fixed_point_position);
+
+    // Create and Configure function
+    CLDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToU32Fixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU16toU32Dataset),
+                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                       DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToU32Fixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU16toU32Dataset),
+                                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                     DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(S16_to_U8)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::S16, 1, fixed_point_position);
+    CLTensor dst = create_tensor<CLTensor>(shape, DataType::U8, 1, fixed_point_position);
+
+    // Create and Configure function
+    CLDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToU8Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerS16toU8Dataset),
+                                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                     DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToU8Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerS16toU8Dataset),
+                                                                                                                   framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                   DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(S16_to_S32)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::S16, 1, fixed_point_position);
+    CLTensor dst = create_tensor<CLTensor>(shape, DataType::S32, 1, fixed_point_position);
+
+    // Create and Configure function
+    CLDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToS32Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerS16toS32Dataset),
+                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                      DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToS32Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerS16toS32Dataset),
+                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                    DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(Quantized_to_FP32)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })),
+                                                                           framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerFixedPointQuantizedDataset),
+               shape, dt, policy, fixed_point_position)
+{
+    int shift = 0;
+
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, dt, 1, fixed_point_position);
+    CLTensor dst = create_tensor<CLTensor>(shape, DataType::F32, 1, fixed_point_position);
+
+    // Create and Configure function
+    CLDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallQS8, CLDepthConvertLayerToFP32FixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+                       DepthConvertLayerQS8toFP32Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallQS16, CLDepthConvertLayerToFP32FixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+                       DepthConvertLayerQS16toFP32Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeQS8, CLDepthConvertLayerToFP32FixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+                       DepthConvertLayerQS8toFP32Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeQS16, CLDepthConvertLayerToFP32FixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+                       DepthConvertLayerQS16toFP32Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP32_to_Quantized)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })),
+                                                                           framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerFixedPointQuantizedDataset),
+               shape, dt, policy, fixed_point_position)
+{
+    int shift = 0;
+
+    // Create tensors
+    CLTensor src = create_tensor<CLTensor>(shape, DataType::F32, 1, fixed_point_position);
+    CLTensor dst = create_tensor<CLTensor>(shape, dt, 1, fixed_point_position);
+
+    // Create and Configure function
+    CLDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallQS8, CLDepthConvertLayerToQS8FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+                       DepthConvertLayerFP32toQS8Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallQS16, CLDepthConvertLayerToQS16FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+                       DepthConvertLayerFP32toQS16Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeQS8, CLDepthConvertLayerToQS8FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+                       DepthConvertLayerFP32toQS8Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeQS16, CLDepthConvertLayerToQS16FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+                       DepthConvertLayerFP32toQS16Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/DepthwiseConvolution.cpp b/tests/validation/CL/DepthwiseConvolution.cpp
deleted file mode 100644
index ccd9c36561..0000000000
--- a/tests/validation/CL/DepthwiseConvolution.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/DepthwiseConvolutionDataset.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/DepthwiseConvolutionFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr RelativeTolerance<float>   tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
-constexpr RelativeTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(DepthwiseConvolutionLayer)
-
-template <typename T>
-using CLDepthwiseConvolutionFixture = DepthwiseConvolutionValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolution, T>;
-
-TEST_SUITE(Generic)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionDataset(), framework::dataset::make("DataType",
-                                                                                                            DataType::F32)))
-{
-    validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionDataset(), framework::dataset::make("DataType",
-                                                                                                                DataType::F32)))
-{
-    validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-TEST_SUITE_END()
-
-template <typename T>
-using CLDepthwiseConvolutionFixture3x3 = DepthwiseConvolutionValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolution3x3, T>;
-
-TEST_SUITE(Float)
-TEST_SUITE(FP32)
-TEST_SUITE(W3x3)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionFixture3x3<float>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionDataset3x3(), framework::dataset::make("DataType",
-                                                                                                               DataType::F32)))
-{
-    validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionDataset3x3(), framework::dataset::make("DataType",
-                                                                                                                   DataType::F32)))
-{
-    validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-TEST_SUITE_END()
-TEST_SUITE_END()
-TEST_SUITE_END()
-
-template <typename T>
-using CLDepthwiseConvolutionQuantizedFixture3x3 = DepthwiseConvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLDepthwiseConvolution3x3, T>;
-
-TEST_SUITE(Quantized)
-TEST_SUITE(QASYMM8)
-TEST_SUITE(W3x3)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionQuantizedFixture3x3<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDepthwiseConvolutionDataset3x3(),
-                       framework::dataset::make("DataType", DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127) })))
-{
-    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionDataset3x3(),
-                       framework::dataset::make("DataType", DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127) })))
-{
-    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
-}
-TEST_SUITE_END()
-TEST_SUITE_END()
-TEST_SUITE_END()
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
new file mode 100644
index 0000000000..92a2773e54
--- /dev/null
+++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/DepthwiseConvolutionLayerDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr RelativeTolerance<float>   tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+constexpr RelativeTolerance<uint8_t> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(DepthwiseConvolutionLayer)
+
+template <typename T>
+using CLDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T>;
+
+TEST_SUITE(Generic)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionLayerDataset(), framework::dataset::make("DataType",
+                                                                                                                 DataType::F32)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+                                                                                                                     framework::dataset::make("DataType",
+                                                                                                                             DataType::F32)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+
+template <typename T>
+using CLDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer3x3, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                                                                                    framework::dataset::make("DataType",
+                                                                                                                            DataType::F32)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+                                                                                                                        framework::dataset::make("DataType",
+                                                                                                                                DataType::F32)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+template <typename T>
+using CLDepthwiseConvolutionLayerQuantizedFixture3x3 = DepthwiseConvolutionLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer3x3, T>;
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127) })))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127) })))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/L2Normalize.cpp b/tests/validation/CL/L2Normalize.cpp
deleted file mode 100644
index 4b0820c211..0000000000
--- a/tests/validation/CL/L2Normalize.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLL2Normalize.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/L2NormalizeFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-/** Tolerance for float operations */
-constexpr AbsoluteTolerance<float> tolerance_f32(0.00001f);
-
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(L2Normalize)
-
-template <typename T>
-using CLL2NormalizeFixture = L2NormalizeValidationFixture<CLTensor, CLAccessor, CLL2Normalize, T>;
-
-TEST_SUITE(Float)
-TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLL2NormalizeFixture<float>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), framework::dataset::make("Epsilon", { 1e-12 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLL2NormalizeFixture<float>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), framework::dataset::make("Epsilon", { 1e-12 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-TEST_SUITE_END()
-TEST_SUITE_END()
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/L2NormalizeLayer.cpp b/tests/validation/CL/L2NormalizeLayer.cpp
new file mode 100644
index 0000000000..bc2374bc68
--- /dev/null
+++ b/tests/validation/CL/L2NormalizeLayer.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLL2NormalizeLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/L2NormalizeLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Tolerance for float operations */
+constexpr AbsoluteTolerance<float> tolerance_f32(0.00001f);
+
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(L2NormalizeLayer)
+
+template <typename T>
+using CLL2NormalizeLayerFixture = L2NormalizeLayerValidationFixture<CLTensor, CLAccessor, CLL2NormalizeLayer, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLL2NormalizeLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), framework::dataset::make("Epsilon", { 1e-12 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLL2NormalizeLayerFixture<float>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), framework::dataset::make("Epsilon", { 1e-12 })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CPP/DepthConvert.cpp b/tests/validation/CPP/DepthConvert.cpp
deleted file mode 100644
index 110174a73f..0000000000
--- a/tests/validation/CPP/DepthConvert.cpp
+++ /dev/null
@@ -1,157 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "DepthConvert.h"
-
-#include "tests/validation/FixedPoint.h"
-#include "tests/validation/Helpers.h"
-
-#include "tests/Types.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace reference
-{
-template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&std::is_floating_point<T2>::value, int >::type >
-SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift)
-{
-    ARM_COMPUTE_UNUSED(policy);
-    ARM_COMPUTE_UNUSED(shift);
-
-    using namespace fixed_point_arithmetic;
-    SimpleTensor<T2> result(src.shape(), dt_out);
-
-    const int fixed_point_position = src.fixed_point_position();
-
-    for(int i = 0; i < src.num_elements(); ++i)
-    {
-        result[i] = static_cast<float>(fixed_point<T1>(src[i], fixed_point_position, true));
-    }
-
-    return result;
-}
-
-template < typename T1, typename T2, typename std::enable_if < std::is_floating_point<T1>::value &&std::is_integral<T2>::value, int >::type >
-SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift)
-{
-    ARM_COMPUTE_UNUSED(policy);
-    ARM_COMPUTE_UNUSED(shift);
-
-    using namespace fixed_point_arithmetic;
-    SimpleTensor<T2> result(src.shape(), dt_out, 1, src.fixed_point_position());
-
-    const int fixed_point_position = result.fixed_point_position();
-
-    for(int i = 0; i < src.num_elements(); ++i)
-    {
-        result[i] = fixed_point<T2>(src[i], fixed_point_position).raw();
-    }
-
-    return result;
-}
-
-template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&std::is_integral<T2>::value &&!std::is_same<T1, T2>::value, int >::type >
-SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift)
-{
-    SimpleTensor<T2> result(src.shape(), dt_out);
-
-    // Up-casting
-    if(src.data_type() <= dt_out)
-    {
-        for(int i = 0; i < src.num_elements(); ++i)
-        {
-            result[i] = src[i] << shift;
-        }
-    }
-    // Down-casting
-    else
-    {
-        for(int i = 0; i < src.num_elements(); ++i)
-        {
-            T1 val    = src[i] >> shift;
-            result[i] = (policy == ConvertPolicy::SATURATE) ? saturate_cast<T2>(val) : static_cast<T2>(val);
-        }
-    }
-    return result;
-}
-
-template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&std::is_integral<T2>::value &&std::is_same<T1, T2>::value, int >::type >
-SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift)
-{
-    ARM_COMPUTE_UNUSED(policy);
-
-    using namespace fixed_point_arithmetic;
-
-    SimpleTensor<T2> result(src.shape(), dt_out);
-
-    bool is_in_place = (&src == &result);
-
-    const int fixed_point_position_in  = src.fixed_point_position();
-    const int fixed_point_position_out = (is_in_place) ? static_cast<int>(shift) : result.fixed_point_position();
-
-    if(!is_in_place || (fixed_point_position_in != fixed_point_position_out))
-    {
-        for(int i = 0; i < src.num_elements(); ++i)
-        {
-            auto x = fixed_point<T2>(src[i], fixed_point_position_in, true);
-            x.resacle(fixed_point_position_out);
-            result[i] = x.raw();
-        }
-    }
-
-    return result;
-}
-
-template < typename T1, typename T2, typename std::enable_if < std::is_floating_point<T1>::value &&is_floating_point<T2>::value, int >::type >
-SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift)
-{
-    ARM_COMPUTE_UNUSED(policy);
-    ARM_COMPUTE_UNUSED(shift);
-
-    SimpleTensor<T2> result(src.shape(), dt_out);
-
-    for(int i = 0; i < src.num_elements(); ++i)
-    {
-        result[i] = static_cast<T2>(src[i]);
-    }
-}
-
-template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
-template SimpleTensor<int16_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
-template SimpleTensor<int32_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
-template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
-template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
-template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
-template SimpleTensor<int32_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
-template SimpleTensor<float> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
-template SimpleTensor<float> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
-template SimpleTensor<int8_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
-template SimpleTensor<int16_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
-} // namespace reference
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CPP/DepthConvert.h b/tests/validation/CPP/DepthConvert.h
deleted file mode 100644
index 1446bfda5b..0000000000
--- a/tests/validation/CPP/DepthConvert.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_TEST_DEPTH_CONVERT_H__
-#define __ARM_COMPUTE_TEST_DEPTH_CONVERT_H__
-
-#include "tests/SimpleTensor.h"
-#include "tests/validation/Helpers.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace reference
-{
-template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&std::is_floating_point<T2>::value, int >::type = 0 >
-SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
-
-template < typename T1, typename T2, typename std::enable_if < std::is_floating_point<T1>::value &&std::is_integral<T2>::value, int >::type = 0 >
-SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
-
-template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&std::is_integral<T2>::value &&!std::is_same<T1, T2>::value, int >::type = 0 >
-SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
-
-template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&std::is_integral<T2>::value &&std::is_same<T1, T2>::value, int >::type = 0 >
-SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
-
-template < typename T1, typename T2, typename std::enable_if < std::is_floating_point<T1>::value &&is_floating_point<T2>::value, int >::type = 0 >
-SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
-} // namespace reference
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_DEPTH_CONVERT_H__ */
diff --git a/tests/validation/CPP/DepthConvertLayer.cpp b/tests/validation/CPP/DepthConvertLayer.cpp
new file mode 100644
index 0000000000..dd095b8912
--- /dev/null
+++ b/tests/validation/CPP/DepthConvertLayer.cpp
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "DepthConvertLayer.h"
+
+#include "tests/validation/FixedPoint.h"
+#include "tests/validation/Helpers.h"
+
+#include "tests/Types.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&std::is_floating_point<T2>::value, int >::type >
+SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift)
+{
+    ARM_COMPUTE_UNUSED(policy);
+    ARM_COMPUTE_UNUSED(shift);
+
+    using namespace fixed_point_arithmetic;
+    SimpleTensor<T2> result(src.shape(), dt_out);
+
+    const int fixed_point_position = src.fixed_point_position();
+
+    for(int i = 0; i < src.num_elements(); ++i)
+    {
+        result[i] = static_cast<float>(fixed_point<T1>(src[i], fixed_point_position, true));
+    }
+
+    return result;
+}
+
+template < typename T1, typename T2, typename std::enable_if < std::is_floating_point<T1>::value &&std::is_integral<T2>::value, int >::type >
+SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift)
+{
+    ARM_COMPUTE_UNUSED(policy);
+    ARM_COMPUTE_UNUSED(shift);
+
+    using namespace fixed_point_arithmetic;
+    SimpleTensor<T2> result(src.shape(), dt_out, 1, src.fixed_point_position());
+
+    const int fixed_point_position = result.fixed_point_position();
+
+    for(int i = 0; i < src.num_elements(); ++i)
+    {
+        result[i] = fixed_point<T2>(src[i], fixed_point_position).raw();
+    }
+
+    return result;
+}
+
+template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&std::is_integral<T2>::value &&!std::is_same<T1, T2>::value, int >::type >
+SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift)
+{
+    SimpleTensor<T2> result(src.shape(), dt_out);
+
+    // Up-casting
+    if(src.data_type() <= dt_out)
+    {
+        for(int i = 0; i < src.num_elements(); ++i)
+        {
+            result[i] = src[i] << shift;
+        }
+    }
+    // Down-casting
+    else
+    {
+        for(int i = 0; i < src.num_elements(); ++i)
+        {
+            T1 val    = src[i] >> shift;
+            result[i] = (policy == ConvertPolicy::SATURATE) ? saturate_cast<T2>(val) : static_cast<T2>(val);
+        }
+    }
+    return result;
+}
+
+template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&std::is_integral<T2>::value &&std::is_same<T1, T2>::value, int >::type >
+SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift)
+{
+    ARM_COMPUTE_UNUSED(policy);
+
+    using namespace fixed_point_arithmetic;
+
+    SimpleTensor<T2> result(src.shape(), dt_out);
+
+    bool is_in_place = (&src == &result);
+
+    const int fixed_point_position_in  = src.fixed_point_position();
+    const int fixed_point_position_out = (is_in_place) ? static_cast<int>(shift) : result.fixed_point_position();
+
+    if(!is_in_place || (fixed_point_position_in != fixed_point_position_out))
+    {
+        for(int i = 0; i < src.num_elements(); ++i)
+        {
+            auto x = fixed_point<T2>(src[i], fixed_point_position_in, true);
+            x.resacle(fixed_point_position_out);
+            result[i] = x.raw();
+        }
+    }
+
+    return result;
+}
+
+template < typename T1, typename T2, typename std::enable_if < std::is_floating_point<T1>::value &&is_floating_point<T2>::value, int >::type >
+SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift)
+{
+    ARM_COMPUTE_UNUSED(policy);
+    ARM_COMPUTE_UNUSED(shift);
+
+    SimpleTensor<T2> result(src.shape(), dt_out);
+
+    for(int i = 0; i < src.num_elements(); ++i)
+    {
+        result[i] = static_cast<T2>(src[i]);
+    }
+}
+
+template SimpleTensor<uint16_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int32_t> depth_convert(const SimpleTensor<uint8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint32_t> depth_convert(const SimpleTensor<uint16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<uint8_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int32_t> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<int8_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<float> depth_convert(const SimpleTensor<int16_t> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int8_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+template SimpleTensor<int16_t> depth_convert(const SimpleTensor<float> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CPP/DepthConvertLayer.h b/tests/validation/CPP/DepthConvertLayer.h
new file mode 100644
index 0000000000..1446bfda5b
--- /dev/null
+++ b/tests/validation/CPP/DepthConvertLayer.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_DEPTH_CONVERT_H__
+#define __ARM_COMPUTE_TEST_DEPTH_CONVERT_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&std::is_floating_point<T2>::value, int >::type = 0 >
+SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+template < typename T1, typename T2, typename std::enable_if < std::is_floating_point<T1>::value &&std::is_integral<T2>::value, int >::type = 0 >
+SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&std::is_integral<T2>::value &&!std::is_same<T1, T2>::value, int >::type = 0 >
+SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+template < typename T1, typename T2, typename std::enable_if < std::is_integral<T1>::value &&std::is_integral<T2>::value &&std::is_same<T1, T2>::value, int >::type = 0 >
+SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+
+template < typename T1, typename T2, typename std::enable_if < std::is_floating_point<T1>::value &&is_floating_point<T2>::value, int >::type = 0 >
+SimpleTensor<T2> depth_convert(const SimpleTensor<T1> &src, DataType dt_out, ConvertPolicy policy, uint32_t shift);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_DEPTH_CONVERT_H__ */
diff --git a/tests/validation/CPP/DepthwiseConvolution.cpp b/tests/validation/CPP/DepthwiseConvolution.cpp
deleted file mode 100644
index 229e044783..0000000000
--- a/tests/validation/CPP/DepthwiseConvolution.cpp
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "DepthwiseConvolution.h"
-
-#include "ConvolutionLayer.h"
-#include "Utils.h"
-
-#include "tests/validation/CPP/Utils.h"
-#include "tests/validation/CPP/UtilsQuantizedAsymm.h"
-#include "tests/validation/FixedPoint.h"
-#include "tests/validation/Helpers.h"
-
-#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace reference
-{
-/** Perform a depthwise convolution
- *
- * - Three dimensions tensors
- * - Third dimention is number of channels
- * - Depths of input tensor and filter are equals
- * - Padding, stride and output shape "match"
- *
- */
-template <typename T, typename TB>
-SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info)
-{
-    // Create reference
-    SimpleTensor<T> dst{ dst_shape, src.data_type(), 1, src.fixed_point_position() };
-
-    // Compute reference
-    const int filter_width  = weights.shape().x();
-    const int filter_height = weights.shape().y();
-    const int filter_plane  = filter_width * filter_height;
-    const int input_width   = src.shape().x();
-    const int input_height  = src.shape().y();
-    const int input_depth   = src.shape().z();
-    const int num_batches   = src.shape().total_size() / (input_width * input_height * input_depth);
-
-    const int filter_half_width  = filter_width / 2;
-    const int filter_half_height = filter_height / 2;
-
-    const int pad_left   = std::min(static_cast<int>(conv_info.pad_left()), filter_half_width);
-    const int pad_top    = std::min(static_cast<int>(conv_info.pad_top()), filter_half_height);
-    const int pad_right  = std::min(static_cast<int>(conv_info.pad_right()), filter_half_width);
-    const int pad_bottom = std::min(static_cast<int>(conv_info.pad_bottom()), filter_half_height);
-
-    const int minimum_x = -pad_left + filter_half_width;
-    const int minimum_y = -pad_top + filter_half_height;
-    const int maximum_x = input_width + pad_left - filter_half_width + pad_right - filter_half_width;
-    const int maximum_y = input_height + pad_top - filter_half_height + pad_bottom - filter_half_height;
-
-    int out_pos = 0;
-    for(int r = 0; r < num_batches; ++r)
-    {
-        for(int z = 0; z < input_depth; ++z)
-        {
-            for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second)
-            {
-                for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first)
-                {
-                    Coordinates coords(static_cast<int>(x), static_cast<int>(y), static_cast<int>(z), static_cast<int>(r));
-                    size_t      filter_offset = filter_plane * z;
-
-                    T val = 0;
-                    for(int j = y - filter_half_height; j <= static_cast<int>(y + filter_half_height); ++j)
-                    {
-                        for(int i = x - filter_half_width; i <= static_cast<int>(x + filter_half_width); ++i)
-                        {
-                            coords.set(0, i);
-                            coords.set(1, j);
-                            val += *(weights.data() + filter_offset) * tensor_elem_at(src, coords, BorderMode::CONSTANT, 0.f);
-                            ++filter_offset;
-                        }
-                    }
-                    coords.set(0, x);
-                    coords.set(1, y);
-                    dst[out_pos++] = saturate_cast<T>(val + *static_cast<const TB *>(biases(Coordinates(z))));
-                }
-            }
-        }
-    }
-
-    return dst;
-}
-
-template <>
-SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
-                                            const PadStrideInfo &conv_info)
-{
-    // Create reference
-    SimpleTensor<uint8_t> dst{ dst_shape, src.data_type(), 1, src.fixed_point_position(), src.quantization_info() };
-
-    const int   input_offset   = -src.quantization_info().offset;
-    const float input_scale    = src.quantization_info().scale;
-    const int   weights_offset = -weights.quantization_info().offset;
-    const float weights_scale  = weights.quantization_info().scale;
-    const int   output_offset  = dst.quantization_info().offset;
-    const float output_scale   = dst.quantization_info().scale;
-
-    int         output_multiplier;
-    int         output_shift;
-    const float multiplier = input_scale * weights_scale / output_scale;
-    arm_compute::quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
-
-    // Compute reference
-    const int filter_width  = weights.shape().x();
-    const int filter_height = weights.shape().y();
-    const int filter_plane  = filter_width * filter_height;
-    const int input_width   = src.shape().x();
-    const int input_height  = src.shape().y();
-    const int input_depth   = src.shape().z();
-    const int num_batches   = src.shape().total_size() / (input_width * input_height * input_depth);
-
-    const int filter_half_size = filter_width / 2;
-    const int pad_x            = std::min(filter_half_size, static_cast<int>(conv_info.pad().first));
-    const int pad_y            = std::min(filter_half_size, static_cast<int>(conv_info.pad().second));
-    const int minimum_x        = -pad_x + filter_half_size;
-    const int minimum_y        = -pad_y + filter_half_size;
-
-    int out_pos = 0;
-    for(int r = 0; r < num_batches; ++r)
-    {
-        for(int z = 0; z < input_depth; ++z)
-        {
-            int32_t bias_val = *static_cast<const int32_t *>(biases(Coordinates(z)));
-            for(int y = minimum_y; y < input_height + pad_y - filter_half_size; y += conv_info.stride().second)
-            {
-                for(int x = minimum_x; x < input_width + pad_x - filter_half_size; x += conv_info.stride().first)
-                {
-                    Coordinates coords(x, y, z);
-                    int         filter_offset = filter_plane * z;
-
-                    uint32_t val = 0;
-                    for(int j = y - filter_half_size; j <= (y + filter_half_size); ++j)
-                    {
-                        for(int i = x - filter_half_size; i <= (x + filter_half_size); ++i)
-                        {
-                            coords.set(0, i);
-                            coords.set(1, j);
-                            auto    in_val = tensor_elem_at<uint8_t>(src, coords, BorderMode::CONSTANT, 0);
-                            uint8_t w_val  = *(weights.data() + filter_offset);
-                            val += (in_val + input_offset) * (w_val + weights_offset);
-                            ++filter_offset;
-                        }
-                    }
-                    val += bias_val;
-                    val = asymm_rounding_divide_by_pow2(asymm_int_mult(val, output_multiplier), output_shift);
-                    val += output_offset;
-                    val = std::max<int32_t>(val, 0);
-                    val = std::min<int32_t>(val, 255);
-
-                    // Store the result
-                    dst[out_pos++] = val;
-                }
-            }
-        }
-    }
-
-    return dst;
-}
-
-template SimpleTensor<float> depthwise_convolution(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &biases, const TensorShape &dst_shape,
-                                                   const PadStrideInfo &conv_info);
-} // namespace reference
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CPP/DepthwiseConvolution.h b/tests/validation/CPP/DepthwiseConvolution.h
deleted file mode 100644
index df743a5b8e..0000000000
--- a/tests/validation/CPP/DepthwiseConvolution.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_H__
-#define __ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_H__
-
-#include "tests/SimpleTensor.h"
-#include "tests/validation/Helpers.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace reference
-{
-template <typename T, typename TB>
-SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info);
-} // namespace reference
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_DEPTHWISE_SEPARABLE_CONVOLUTION_LAYER_H__ */
diff --git a/tests/validation/CPP/DepthwiseConvolutionLayer.cpp b/tests/validation/CPP/DepthwiseConvolutionLayer.cpp
new file mode 100644
index 0000000000..99baa4b3c7
--- /dev/null
+++ b/tests/validation/CPP/DepthwiseConvolutionLayer.cpp
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "DepthwiseConvolutionLayer.h"
+
+#include "ConvolutionLayer.h"
+#include "Utils.h"
+
+#include "tests/validation/CPP/Utils.h"
+#include "tests/validation/CPP/UtilsQuantizedAsymm.h"
+#include "tests/validation/FixedPoint.h"
+#include "tests/validation/Helpers.h"
+
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+/** Perform a depthwise convolution
+ *
+ * - Three dimensions tensors
+ * - Third dimention is number of channels
+ * - Depths of input tensor and filter are equals
+ * - Padding, stride and output shape "match"
+ *
+ */
+template <typename T, typename TB>
+SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info)
+{
+    // Create reference
+    SimpleTensor<T> dst{ dst_shape, src.data_type(), 1, src.fixed_point_position() };
+
+    // Compute reference
+    const int filter_width  = weights.shape().x();
+    const int filter_height = weights.shape().y();
+    const int filter_plane  = filter_width * filter_height;
+    const int input_width   = src.shape().x();
+    const int input_height  = src.shape().y();
+    const int input_depth   = src.shape().z();
+    const int num_batches   = src.shape().total_size() / (input_width * input_height * input_depth);
+
+    const int filter_half_width  = filter_width / 2;
+    const int filter_half_height = filter_height / 2;
+
+    const int pad_left   = std::min(static_cast<int>(conv_info.pad_left()), filter_half_width);
+    const int pad_top    = std::min(static_cast<int>(conv_info.pad_top()), filter_half_height);
+    const int pad_right  = std::min(static_cast<int>(conv_info.pad_right()), filter_half_width);
+    const int pad_bottom = std::min(static_cast<int>(conv_info.pad_bottom()), filter_half_height);
+
+    const int minimum_x = -pad_left + filter_half_width;
+    const int minimum_y = -pad_top + filter_half_height;
+    const int maximum_x = input_width + pad_left - filter_half_width + pad_right - filter_half_width;
+    const int maximum_y = input_height + pad_top - filter_half_height + pad_bottom - filter_half_height;
+
+    int out_pos = 0;
+    for(int r = 0; r < num_batches; ++r)
+    {
+        for(int z = 0; z < input_depth; ++z)
+        {
+            for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second)
+            {
+                for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first)
+                {
+                    Coordinates coords(static_cast<int>(x), static_cast<int>(y), static_cast<int>(z), static_cast<int>(r));
+                    size_t      filter_offset = filter_plane * z;
+
+                    T val = 0;
+                    for(int j = y - filter_half_height; j <= static_cast<int>(y + filter_half_height); ++j)
+                    {
+                        for(int i = x - filter_half_width; i <= static_cast<int>(x + filter_half_width); ++i)
+                        {
+                            coords.set(0, i);
+                            coords.set(1, j);
+                            val += *(weights.data() + filter_offset) * tensor_elem_at(src, coords, BorderMode::CONSTANT, 0.f);
+                            ++filter_offset;
+                        }
+                    }
+                    coords.set(0, x);
+                    coords.set(1, y);
+                    dst[out_pos++] = saturate_cast<T>(val + *static_cast<const TB *>(biases(Coordinates(z))));
+                }
+            }
+        }
+    }
+
+    return dst;
+}
+
+template <>
+SimpleTensor<uint8_t> depthwise_convolution(const SimpleTensor<uint8_t> &src, const SimpleTensor<uint8_t> &weights, const SimpleTensor<int32_t> &biases, const TensorShape &dst_shape,
+                                            const PadStrideInfo &conv_info)
+{
+    // Create reference
+    SimpleTensor<uint8_t> dst{ dst_shape, src.data_type(), 1, src.fixed_point_position(), src.quantization_info() };
+
+    const int   input_offset   = -src.quantization_info().offset;
+    const float input_scale    = src.quantization_info().scale;
+    const int   weights_offset = -weights.quantization_info().offset;
+    const float weights_scale  = weights.quantization_info().scale;
+    const int   output_offset  = dst.quantization_info().offset;
+    const float output_scale   = dst.quantization_info().scale;
+
+    int         output_multiplier;
+    int         output_shift;
+    const float multiplier = input_scale * weights_scale / output_scale;
+    arm_compute::quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
+
+    // Compute reference
+    const int filter_width  = weights.shape().x();
+    const int filter_height = weights.shape().y();
+    const int filter_plane  = filter_width * filter_height;
+    const int input_width   = src.shape().x();
+    const int input_height  = src.shape().y();
+    const int input_depth   = src.shape().z();
+    const int num_batches   = src.shape().total_size() / (input_width * input_height * input_depth);
+
+    const int filter_half_size = filter_width / 2;
+    const int pad_x            = std::min(filter_half_size, static_cast<int>(conv_info.pad().first));
+    const int pad_y            = std::min(filter_half_size, static_cast<int>(conv_info.pad().second));
+    const int minimum_x        = -pad_x + filter_half_size;
+    const int minimum_y        = -pad_y + filter_half_size;
+
+    int out_pos = 0;
+    for(int r = 0; r < num_batches; ++r)
+    {
+        for(int z = 0; z < input_depth; ++z)
+        {
+            int32_t bias_val = *static_cast<const int32_t *>(biases(Coordinates(z)));
+            for(int y = minimum_y; y < input_height + pad_y - filter_half_size; y += conv_info.stride().second)
+            {
+                for(int x = minimum_x; x < input_width + pad_x - filter_half_size; x += conv_info.stride().first)
+                {
+                    Coordinates coords(x, y, z);
+                    int         filter_offset = filter_plane * z;
+
+                    uint32_t val = 0;
+                    for(int j = y - filter_half_size; j <= (y + filter_half_size); ++j)
+                    {
+                        for(int i = x - filter_half_size; i <= (x + filter_half_size); ++i)
+                        {
+                            coords.set(0, i);
+                            coords.set(1, j);
+                            auto    in_val = tensor_elem_at<uint8_t>(src, coords, BorderMode::CONSTANT, 0);
+                            uint8_t w_val  = *(weights.data() + filter_offset);
+                            val += (in_val + input_offset) * (w_val + weights_offset);
+                            ++filter_offset;
+                        }
+                    }
+                    val += bias_val;
+                    val = asymm_rounding_divide_by_pow2(asymm_int_mult(val, output_multiplier), output_shift);
+                    val += output_offset;
+                    val = std::max<int32_t>(val, 0);
+                    val = std::min<int32_t>(val, 255);
+
+                    // Store the result
+                    dst[out_pos++] = val;
+                }
+            }
+        }
+    }
+
+    return dst;
+}
+
+template SimpleTensor<float> depthwise_convolution(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &biases, const TensorShape &dst_shape,
+                                                   const PadStrideInfo &conv_info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CPP/DepthwiseConvolutionLayer.h b/tests/validation/CPP/DepthwiseConvolutionLayer.h
new file mode 100644
index 0000000000..df743a5b8e
--- /dev/null
+++ b/tests/validation/CPP/DepthwiseConvolutionLayer.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_H__
+#define __ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T, typename TB>
+SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<TB> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_DEPTHWISE_SEPARABLE_CONVOLUTION_LAYER_H__ */
diff --git a/tests/validation/CPP/DepthwiseSeparableConvolutionLayer.cpp b/tests/validation/CPP/DepthwiseSeparableConvolutionLayer.cpp
index 8c8e50d349..ca6c168114 100644
--- a/tests/validation/CPP/DepthwiseSeparableConvolutionLayer.cpp
+++ b/tests/validation/CPP/DepthwiseSeparableConvolutionLayer.cpp
@@ -21,7 +21,7 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "DepthwiseConvolution.h"
+#include "DepthwiseConvolutionLayer.h"
 
 #include "DepthwiseSeparableConvolutionLayer.h"
 
diff --git a/tests/validation/CPP/L2Normalize.cpp b/tests/validation/CPP/L2Normalize.cpp
deleted file mode 100644
index 4fb4d57eb4..0000000000
--- a/tests/validation/CPP/L2Normalize.cpp
+++ /dev/null
@@ -1,88 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "L2Normalize.h"
-#include "ReductionOperation.h"
-
-#include "tests/validation/Helpers.h"
-
-#include <algorithm>
-#include <cmath>
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace reference
-{
-namespace
-{
-TensorShape get_output_shape(TensorShape shape, unsigned int axis)
-{
-    TensorShape output_shape(shape);
-    output_shape.set(axis, 1);
-    return output_shape;
-}
-} // namespace
-
-template <typename T>
-SimpleTensor<T> l2_normalize(const SimpleTensor<T> &src, unsigned int axis, float epsilon)
-{
-    // Create reference
-    SimpleTensor<T> dst{ src.shape(), src.data_type() };
-
-    // Reduce across given axis
-    SimpleTensor<T> sum = reduction_operation(src, get_output_shape(src.shape(), axis), axis, ReductionOperation::SUM_SQUARE);
-
-    // Compute reference
-    const int elems      = src.shape()[axis];
-    const int upper_dims = src.shape().total_size_upper(axis + 1);
-
-    for(int du = 0; du < upper_dims; ++du)
-    {
-        if(axis == 0)
-        {
-            const T *src_row_ptr         = src.data() + du * elems;
-            T       *dst_row_ptr         = dst.data() + du * elems;
-            const T  normalization_value = std::sqrt(std::max(sum[du], epsilon));
-            std::transform(src_row_ptr, src_row_ptr + elems, dst_row_ptr, [normalization_value](T val)
-            {
-                return val / normalization_value;
-            });
-        }
-        else
-        {
-            ARM_COMPUTE_ERROR("Unsupported normalization axis");
-        }
-    }
-
-    return dst;
-}
-
-template SimpleTensor<float> l2_normalize(const SimpleTensor<float> &src, unsigned int axis, float epsilon);
-} // namespace reference
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CPP/L2Normalize.h b/tests/validation/CPP/L2Normalize.h
deleted file mode 100644
index 1db3ae6174..0000000000
--- a/tests/validation/CPP/L2Normalize.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef __ARM_COMPUTE_TEST_L2NORMALIZE_H__
-#define __ARM_COMPUTE_TEST_L2NORMALIZE_H__
-
-#include "tests/SimpleTensor.h"
-#include "tests/validation/Helpers.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace reference
-{
-template <typename T>
-SimpleTensor<T> l2_normalize(const SimpleTensor<T> &src, unsigned int axis, float epsilon);
-} // namespace reference
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* __ARM_COMPUTE_TEST_L2NORMALIZE_H__ */
diff --git a/tests/validation/CPP/L2NormalizeLayer.cpp b/tests/validation/CPP/L2NormalizeLayer.cpp
new file mode 100644
index 0000000000..99f4e8a6e6
--- /dev/null
+++ b/tests/validation/CPP/L2NormalizeLayer.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "L2NormalizeLayer.h"
+#include "ReductionOperation.h"
+
+#include "tests/validation/Helpers.h"
+
+#include <algorithm>
+#include <cmath>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+namespace
+{
+TensorShape get_output_shape(TensorShape shape, unsigned int axis)
+{
+    TensorShape output_shape(shape);
+    output_shape.set(axis, 1);
+    return output_shape;
+}
+} // namespace
+
+template <typename T>
+SimpleTensor<T> l2_normalize(const SimpleTensor<T> &src, unsigned int axis, float epsilon)
+{
+    // Create reference
+    SimpleTensor<T> dst{ src.shape(), src.data_type() };
+
+    // Reduce across given axis
+    SimpleTensor<T> sum = reduction_operation(src, get_output_shape(src.shape(), axis), axis, ReductionOperation::SUM_SQUARE);
+
+    // Compute reference
+    const int elems      = src.shape()[axis];
+    const int upper_dims = src.shape().total_size_upper(axis + 1);
+
+    for(int du = 0; du < upper_dims; ++du)
+    {
+        if(axis == 0)
+        {
+            const T *src_row_ptr         = src.data() + du * elems;
+            T       *dst_row_ptr         = dst.data() + du * elems;
+            const T  normalization_value = std::sqrt(std::max(sum[du], epsilon));
+            std::transform(src_row_ptr, src_row_ptr + elems, dst_row_ptr, [normalization_value](T val)
+            {
+                return val / normalization_value;
+            });
+        }
+        else
+        {
+            ARM_COMPUTE_ERROR("Unsupported normalization axis");
+        }
+    }
+
+    return dst;
+}
+
+template SimpleTensor<float> l2_normalize(const SimpleTensor<float> &src, unsigned int axis, float epsilon);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CPP/L2NormalizeLayer.h b/tests/validation/CPP/L2NormalizeLayer.h
new file mode 100644
index 0000000000..1db3ae6174
--- /dev/null
+++ b/tests/validation/CPP/L2NormalizeLayer.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_L2NORMALIZE_H__
+#define __ARM_COMPUTE_TEST_L2NORMALIZE_H__
+
+#include "tests/SimpleTensor.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace reference
+{
+template <typename T>
+SimpleTensor<T> l2_normalize(const SimpleTensor<T> &src, unsigned int axis, float epsilon);
+} // namespace reference
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_L2NORMALIZE_H__ */
diff --git a/tests/validation/GLES_COMPUTE/DepthConcatenateLayer.cpp b/tests/validation/GLES_COMPUTE/DepthConcatenateLayer.cpp
index 829845dd36..7af3050c1d 100644
--- a/tests/validation/GLES_COMPUTE/DepthConcatenateLayer.cpp
+++ b/tests/validation/GLES_COMPUTE/DepthConcatenateLayer.cpp
@@ -24,7 +24,7 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
 #include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
-#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenateLayer.h"
 #include "tests/GLES_COMPUTE/GCAccessor.h"
 #include "tests/datasets/ShapeDatasets.h"
 #include "tests/framework/Asserts.h"
@@ -45,7 +45,7 @@ TEST_SUITE(DepthConcatenateLayer)
 //TODO(COMPMID-415): Add configuration test?
 
 template <typename T>
-using GCDepthConcatenateLayerFixture = DepthConcatenateValidationFixture<GCTensor, IGCTensor, GCAccessor, GCDepthConcatenate, T>;
+using GCDepthConcatenateLayerFixture = DepthConcatenateLayerValidationFixture<GCTensor, IGCTensor, GCAccessor, GCDepthConcatenateLayer, T>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
@@ -70,7 +70,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthConcatenateLayerFixture<float>, framewor
     // Validate output
     validate(GCAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(), framework::dataset::make("DataType",
                                                                                                                  DataType::F32)))
 {
     // Validate output
diff --git a/tests/validation/NEON/DepthConcatenateLayer.cpp b/tests/validation/NEON/DepthConcatenateLayer.cpp
index 9a0a34f8f8..7e99ab5dc7 100644
--- a/tests/validation/NEON/DepthConcatenateLayer.cpp
+++ b/tests/validation/NEON/DepthConcatenateLayer.cpp
@@ -22,7 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEDepthConcatenate.h"
+#include "arm_compute/runtime/NEON/functions/NEDepthConcatenateLayer.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "arm_compute/runtime/TensorAllocator.h"
 #include "tests/NEON/Accessor.h"
@@ -45,7 +45,7 @@ TEST_SUITE(DepthConcatenateLayer)
 //TODO(COMPMID-415): Add configuration test?
 
 template <typename T>
-using NEDepthConcatenateLayerFixture = DepthConcatenateValidationFixture<Tensor, ITensor, Accessor, NEDepthConcatenate, T>;
+using NEDepthConcatenateLayerFixture = DepthConcatenateLayerValidationFixture<Tensor, ITensor, Accessor, NEDepthConcatenateLayer, T>;
 
 TEST_SUITE(Float)
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
@@ -56,7 +56,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture<half>, framework
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(), framework::dataset::make("DataType",
                                                                                                                 DataType::F16)))
 {
     // Validate output
@@ -72,7 +72,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture<float>, framewor
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(), framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(), framework::dataset::make("DataType",
                                                                                                                  DataType::F32)))
 {
     // Validate output
@@ -90,7 +90,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture<int8_t>, framewo
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(),
                                                                                                                   framework::dataset::make("DataType",
                                                                                                                           DataType::QS8)))
 {
@@ -107,7 +107,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConcatenateLayerFixture<int16_t>, framew
     // Validate output
     validate(Accessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(),
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConcatenateLayerFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateLayerShapes(),
                                                                                                                    framework::dataset::make("DataType",
                                                                                                                            DataType::QS16)))
 {
diff --git a/tests/validation/NEON/DepthConvert.cpp b/tests/validation/NEON/DepthConvert.cpp
deleted file mode 100644
index e036cc45d1..0000000000
--- a/tests/validation/NEON/DepthConvert.cpp
+++ /dev/null
@@ -1,484 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/NEON/Accessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ConvertPolicyDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/DepthConvertFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-/** Input data sets **/
-const auto DepthConvertU8toU16Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16));
-const auto DepthConvertU8toS16Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16));
-const auto DepthConvertU8toS32Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32));
-const auto DepthConvertU16toU8Dataset             = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8));
-const auto DepthConvertU16toU32Dataset            = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32));
-const auto DepthConvertS16toU8Dataset             = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8));
-const auto DepthConvertS16toS32Dataset            = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32));
-const auto DepthConvertQS8toFP32Dataset           = combine(framework::dataset::make("DataType", DataType::QS8), framework::dataset::make("DataType", DataType::F32));
-const auto DepthConvertQS16toFP32Dataset          = combine(framework::dataset::make("DataType", DataType::QS16), framework::dataset::make("DataType", DataType::F32));
-const auto DepthConvertFP32toQS8Dataset           = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS8));
-const auto DepthConvertFP32toQS16Dataset          = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS16));
-const auto DepthConvertShiftDataset               = framework::dataset::make("Shift", 0, 7);
-const auto DepthConvertFixedPointQuantizedDataset = framework::dataset::make("FractionalBits", 1, 7);
-} // namespace
-
-TEST_SUITE(NEON)
-TEST_SUITE(DepthConvert)
-template <typename T>
-using NEDepthConvertToU16Fixture = DepthConvertValidationFixture<Tensor, Accessor, NEDepthConvert, T, uint16_t>;
-template <typename T>
-using NEDepthConvertToS16Fixture = DepthConvertValidationFixture<Tensor, Accessor, NEDepthConvert, T, int16_t>;
-template <typename T>
-using NEDepthConvertToS32Fixture = DepthConvertValidationFixture<Tensor, Accessor, NEDepthConvert, T, int32_t>;
-template <typename T>
-using NEDepthConvertToU8Fixture = DepthConvertValidationFixture<Tensor, Accessor, NEDepthConvert, T, uint8_t>;
-template <typename T>
-using NEDepthConvertToU32Fixture = DepthConvertValidationFixture<Tensor, Accessor, NEDepthConvert, T, uint32_t>;
-template <typename T>
-using NEDepthConvertToFP32FixedPointFixture = DepthConvertValidationFractionalBitsFixture<Tensor, Accessor, NEDepthConvert, T, float>;
-template <typename T>
-using NEDepthConvertToQS8FixedPointFixture = DepthConvertValidationFractionalBitsFixture<Tensor, Accessor, NEDepthConvert, T, int8_t>;
-template <typename T>
-using NEDepthConvertToQS16FixedPointFixture = DepthConvertValidationFractionalBitsFixture<Tensor, Accessor, NEDepthConvert, T, int16_t>;
-
-TEST_SUITE(U8_to_U16)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    Tensor src = create_tensor<Tensor>(shape, DataType::U8, 1, fixed_point_position);
-    Tensor dst = create_tensor<Tensor>(shape, DataType::U16, 1, fixed_point_position);
-
-    // Create and Configure function
-    NEDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertToU16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU8toU16Dataset),
-                                                                                                                 framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                 DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertToU16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU8toU16Dataset),
-                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                               DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(U8_to_S16)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    Tensor src = create_tensor<Tensor>(shape, DataType::U8, 1, fixed_point_position);
-    Tensor dst = create_tensor<Tensor>(shape, DataType::S16, 1, fixed_point_position);
-
-    // Create and Configure function
-    NEDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertToS16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU8toS16Dataset),
-                                                                                                                 framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                 DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertToS16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU8toS16Dataset),
-                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                               DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END()
-TEST_SUITE(U8_to_S32)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    Tensor src = create_tensor<Tensor>(shape, DataType::U8, 1, fixed_point_position);
-    Tensor dst = create_tensor<Tensor>(shape, DataType::S32, 1, fixed_point_position);
-
-    // Create and Configure function
-    NEDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertToS32Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU8toS32Dataset),
-                                                                                                                 framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                 DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertToS32Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU8toS32Dataset),
-                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                               DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(U16_to_U8)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    Tensor src = create_tensor<Tensor>(shape, DataType::U16, 1, fixed_point_position);
-    Tensor dst = create_tensor<Tensor>(shape, DataType::U8, 1, fixed_point_position);
-
-    // Create and Configure function
-    NEDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertToU8Fixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU16toU8Dataset),
-                                                                                                                 framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                 DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertToU8Fixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU16toU8Dataset),
-                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                               DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(U16_to_U32)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    Tensor src = create_tensor<Tensor>(shape, DataType::U16, 1, fixed_point_position);
-    Tensor dst = create_tensor<Tensor>(shape, DataType::U32, 1, fixed_point_position);
-
-    // Create and Configure function
-    NEDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertToU32Fixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertU16toU32Dataset),
-                                                                                                                  framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                  DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertToU32Fixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertU16toU32Dataset),
-                                                                                                                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(S16_to_U8)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    Tensor src = create_tensor<Tensor>(shape, DataType::S16, 1, fixed_point_position);
-    Tensor dst = create_tensor<Tensor>(shape, DataType::U8, 1, fixed_point_position);
-
-    // Create and Configure function
-    NEDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertToU8Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertS16toU8Dataset),
-                                                                                                                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertToU8Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertS16toU8Dataset),
-                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                              DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(S16_to_S32)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertShiftDataset),
-               shape, policy, shift)
-{
-    int fixed_point_position = 0;
-
-    // Create tensors
-    Tensor src = create_tensor<Tensor>(shape, DataType::S16, 1, fixed_point_position);
-    Tensor dst = create_tensor<Tensor>(shape, DataType::S32, 1, fixed_point_position);
-
-    // Create and Configure function
-    NEDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertToS32Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertS16toS32Dataset),
-                                                                                                                 framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                 DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertToS32Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertS16toS32Dataset),
-                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                               DepthConvertShiftDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(Quantized_to_FP32)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })),
-                                                                           framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertFixedPointQuantizedDataset),
-               shape, dt, policy, fixed_point_position)
-{
-    int shift = 0;
-
-    // Create tensors
-    Tensor src = create_tensor<Tensor>(shape, dt, 1, fixed_point_position);
-    Tensor dst = create_tensor<Tensor>(shape, DataType::F32, 1, fixed_point_position);
-
-    // Create and Configure function
-    NEDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-FIXTURE_DATA_TEST_CASE(RunSmallQS8, NEDepthConvertToFP32FixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertQS8toFP32Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunSmallQS16, NEDepthConvertToFP32FixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertQS16toFP32Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLargeQS8, NEDepthConvertToFP32FixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertQS8toFP32Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLargeQS16, NEDepthConvertToFP32FixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertQS16toFP32Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(FP32_to_Quantized)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })),
-                                                                           framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                   DepthConvertFixedPointQuantizedDataset),
-               shape, dt, policy, fixed_point_position)
-{
-    int shift = 0;
-
-    // Create tensors
-    Tensor src = create_tensor<Tensor>(shape, DataType::F32, 1, fixed_point_position);
-    Tensor dst = create_tensor<Tensor>(shape, dt, 1, fixed_point_position);
-
-    // Create and Configure function
-    NEDepthConvert depth_convert;
-    depth_convert.configure(&src, &dst, policy, shift);
-
-    // Validate valid region
-    const ValidRegion valid_region = shape_to_valid_region(shape);
-    validate(dst.info()->valid_region(), valid_region);
-
-    // Validate padding
-    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
-    validate(src.info()->padding(), padding);
-    validate(dst.info()->padding(), padding);
-}
-FIXTURE_DATA_TEST_CASE(RunSmallQS8, NEDepthConvertToQS8FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertFP32toQS8Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunSmallQS16, NEDepthConvertToQS16FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertFP32toQS16Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLargeQS8, NEDepthConvertToQS8FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertFP32toQS8Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLargeQS16, NEDepthConvertToQS16FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertFP32toQS16Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       DepthConvertFixedPointQuantizedDataset))
-{
-    // Validate output
-    validate(Accessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/NEON/DepthConvertLayer.cpp b/tests/validation/NEON/DepthConvertLayer.cpp
new file mode 100644
index 0000000000..a56298babc
--- /dev/null
+++ b/tests/validation/NEON/DepthConvertLayer.cpp
@@ -0,0 +1,492 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ConvertPolicyDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DepthConvertLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Input data sets **/
+const auto DepthConvertLayerU8toU16Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16));
+const auto DepthConvertLayerU8toS16Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16));
+const auto DepthConvertLayerU8toS32Dataset             = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32));
+const auto DepthConvertLayerU16toU8Dataset             = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8));
+const auto DepthConvertLayerU16toU32Dataset            = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32));
+const auto DepthConvertLayerS16toU8Dataset             = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8));
+const auto DepthConvertLayerS16toS32Dataset            = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32));
+const auto DepthConvertLayerQS8toFP32Dataset           = combine(framework::dataset::make("DataType", DataType::QS8), framework::dataset::make("DataType", DataType::F32));
+const auto DepthConvertLayerQS16toFP32Dataset          = combine(framework::dataset::make("DataType", DataType::QS16), framework::dataset::make("DataType", DataType::F32));
+const auto DepthConvertLayerFP32toQS8Dataset           = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS8));
+const auto DepthConvertLayerFP32toQS16Dataset          = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QS16));
+const auto DepthConvertLayerShiftDataset               = framework::dataset::make("Shift", 0, 7);
+const auto DepthConvertLayerFixedPointQuantizedDataset = framework::dataset::make("FractionalBits", 1, 7);
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(DepthConvertLayer)
+template <typename T>
+using NEDepthConvertLayerToU16Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, uint16_t>;
+template <typename T>
+using NEDepthConvertLayerToS16Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, int16_t>;
+template <typename T>
+using NEDepthConvertLayerToS32Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, int32_t>;
+template <typename T>
+using NEDepthConvertLayerToU8Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, uint8_t>;
+template <typename T>
+using NEDepthConvertLayerToU32Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, uint32_t>;
+template <typename T>
+using NEDepthConvertLayerToFP32FixedPointFixture = DepthConvertLayerValidationFractionalBitsFixture<Tensor, Accessor, NEDepthConvertLayer, T, float>;
+template <typename T>
+using NEDepthConvertLayerToQS8FixedPointFixture = DepthConvertLayerValidationFractionalBitsFixture<Tensor, Accessor, NEDepthConvertLayer, T, int8_t>;
+template <typename T>
+using NEDepthConvertLayerToQS16FixedPointFixture = DepthConvertLayerValidationFractionalBitsFixture<Tensor, Accessor, NEDepthConvertLayer, T, int16_t>;
+
+TEST_SUITE(U8_to_U16)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, DataType::U8, 1, fixed_point_position);
+    Tensor dst = create_tensor<Tensor>(shape, DataType::U16, 1, fixed_point_position);
+
+    // Create and Configure function
+    NEDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toU16Dataset),
+                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                      DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toU16Dataset),
+                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                    DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U8_to_S16)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, DataType::U8, 1, fixed_point_position);
+    Tensor dst = create_tensor<Tensor>(shape, DataType::S16, 1, fixed_point_position);
+
+    // Create and Configure function
+    NEDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToS16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toS16Dataset),
+                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                      DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toS16Dataset),
+                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                    DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+TEST_SUITE(U8_to_S32)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, DataType::U8, 1, fixed_point_position);
+    Tensor dst = create_tensor<Tensor>(shape, DataType::S32, 1, fixed_point_position);
+
+    // Create and Configure function
+    NEDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToS32Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toS32Dataset),
+                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                      DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS32Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toS32Dataset),
+                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                    DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U16_to_U8)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, DataType::U16, 1, fixed_point_position);
+    Tensor dst = create_tensor<Tensor>(shape, DataType::U8, 1, fixed_point_position);
+
+    // Create and Configure function
+    NEDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU8Fixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU16toU8Dataset),
+                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                      DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU8Fixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU16toU8Dataset),
+                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                    DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(U16_to_U32)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, DataType::U16, 1, fixed_point_position);
+    Tensor dst = create_tensor<Tensor>(shape, DataType::U32, 1, fixed_point_position);
+
+    // Create and Configure function
+    NEDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU32Fixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU16toU32Dataset),
+                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                       DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU32Fixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU16toU32Dataset),
+                                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                     DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(S16_to_U8)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, DataType::S16, 1, fixed_point_position);
+    Tensor dst = create_tensor<Tensor>(shape, DataType::U8, 1, fixed_point_position);
+
+    // Create and Configure function
+    NEDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU8Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerS16toU8Dataset),
+                                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                     DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU8Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerS16toU8Dataset),
+                                                                                                                   framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                   DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(S16_to_S32)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerShiftDataset),
+               shape, policy, shift)
+{
+    int fixed_point_position = 0;
+
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, DataType::S16, 1, fixed_point_position);
+    Tensor dst = create_tensor<Tensor>(shape, DataType::S32, 1, fixed_point_position);
+
+    // Create and Configure function
+    NEDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToS32Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerS16toS32Dataset),
+                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                      DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS32Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerS16toS32Dataset),
+                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                    DepthConvertLayerShiftDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(Quantized_to_FP32)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })),
+                                                                           framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerFixedPointQuantizedDataset),
+               shape, dt, policy, fixed_point_position)
+{
+    int shift = 0;
+
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, dt, 1, fixed_point_position);
+    Tensor dst = create_tensor<Tensor>(shape, DataType::F32, 1, fixed_point_position);
+
+    // Create and Configure function
+    NEDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallQS8, NEDepthConvertLayerToFP32FixedPointFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+                       DepthConvertLayerQS8toFP32Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallQS16, NEDepthConvertLayerToFP32FixedPointFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+                       DepthConvertLayerQS16toFP32Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeQS8, NEDepthConvertLayerToFP32FixedPointFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+                       DepthConvertLayerQS8toFP32Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeQS16, NEDepthConvertLayerToFP32FixedPointFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+                       DepthConvertLayerQS16toFP32Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP32_to_Quantized)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::QS8, DataType::QS16 })),
+                                                                           framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                   DepthConvertLayerFixedPointQuantizedDataset),
+               shape, dt, policy, fixed_point_position)
+{
+    int shift = 0;
+
+    // Create tensors
+    Tensor src = create_tensor<Tensor>(shape, DataType::F32, 1, fixed_point_position);
+    Tensor dst = create_tensor<Tensor>(shape, dt, 1, fixed_point_position);
+
+    // Create and Configure function
+    NEDepthConvertLayer depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallQS8, NEDepthConvertLayerToQS8FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+                       DepthConvertLayerFP32toQS8Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallQS16, NEDepthConvertLayerToQS16FixedPointFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(),
+                       DepthConvertLayerFP32toQS16Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeQS8, NEDepthConvertLayerToQS8FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+                       DepthConvertLayerFP32toQS8Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeQS16, NEDepthConvertLayerToQS16FixedPointFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(),
+                       DepthConvertLayerFP32toQS16Dataset),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                       DepthConvertLayerFixedPointQuantizedDataset))
+{
+    // Validate output
+    validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/DepthwiseConvolution.cpp b/tests/validation/NEON/DepthwiseConvolution.cpp
deleted file mode 100644
index 3a4b7aa2e9..0000000000
--- a/tests/validation/NEON/DepthwiseConvolution.cpp
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolution.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/NEON/Accessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/DepthwiseConvolutionDataset.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/DepthwiseConvolutionFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr RelativeTolerance<float> tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
-} // namespace
-
-TEST_SUITE(NEON)
-TEST_SUITE(DepthwiseConvolutionLayer)
-
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionDataset3x3(), datasets::LargeDepthwiseConvolutionDataset3x3()),
-                                                                   framework::dataset::make("DataType", DataType::F32)),
-               input_shape, weights_shape, bias_shape, output_shape, info, data_type)
-{
-    // Create tensors
-    Tensor src     = create_tensor<Tensor>(input_shape, data_type);
-    Tensor dst     = create_tensor<Tensor>(output_shape, data_type);
-    Tensor weights = create_tensor<Tensor>(weights_shape, data_type);
-    Tensor bias    = create_tensor<Tensor>(bias_shape, data_type);
-
-    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
-
-    // Create and configure function
-    NEDepthwiseConvolution3x3 depthwise_layer;
-    depthwise_layer.configure(&src, &weights, &bias, &dst, info);
-
-    // Validate valid region
-    const ValidRegion input_valid_region   = shape_to_valid_region(input_shape);
-    const ValidRegion output_valid_region  = shape_to_valid_region(output_shape);
-    const ValidRegion weights_valid_region = shape_to_valid_region(weights_shape);
-    const ValidRegion bias_valid_region    = shape_to_valid_region(bias_shape);
-
-    validate(src.info()->valid_region(), input_valid_region);
-    validate(dst.info()->valid_region(), output_valid_region);
-    validate(weights.info()->valid_region(), weights_valid_region);
-    validate(bias.info()->valid_region(), bias_valid_region);
-
-    // Validate padding
-    const int         step    = 16 >> info.stride().first;
-    const PaddingSize padding = PaddingCalculator(output_shape.x(), step).required_padding();
-    validate(dst.info()->padding(), padding);
-}
-
-TEST_SUITE(Float)
-TEST_SUITE(F32)
-TEST_SUITE(Generic)
-template <typename T>
-using NEDepthwiseConvolutionFixture = DepthwiseConvolutionValidationFixture<Tensor, Accessor, NEDepthwiseConvolution, T>;
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallDepthwiseConvolutionDataset(), framework::dataset::make("DataType",
-                                                                                                                  DataType::F32)))
-{
-    validate(Accessor(_target), _reference, tolerance_f32);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionDataset(), framework::dataset::make("DataType",
-                                                                                                                DataType::F32)))
-{
-    validate(Accessor(_target), _reference, tolerance_f32);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(W3x3)
-template <typename T>
-using NEDepthwiseConvolutionFixture3x3 = DepthwiseConvolutionValidationFixture<Tensor, Accessor, NEDepthwiseConvolution3x3, T>;
-FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionFixture3x3<float>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionDataset3x3(), framework::dataset::make("DataType",
-                                                                                                               DataType::F32)))
-{
-    validate(Accessor(_target), _reference, tolerance_f32);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionDataset3x3(), framework::dataset::make("DataType",
-                                                                                                                   DataType::F32)))
-{
-    validate(Accessor(_target), _reference, tolerance_f32);
-}
-TEST_SUITE_END()
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
new file mode 100644
index 0000000000..17eaaf8ad7
--- /dev/null
+++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/DepthwiseConvolutionLayerDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr RelativeTolerance<float> tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(DepthwiseConvolutionLayer)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                                                              datasets::LargeDepthwiseConvolutionLayerDataset3x3()),
+                                                                   framework::dataset::make("DataType", DataType::F32)),
+               input_shape, weights_shape, bias_shape, output_shape, info, data_type)
+{
+    // Create tensors
+    Tensor src     = create_tensor<Tensor>(input_shape, data_type);
+    Tensor dst     = create_tensor<Tensor>(output_shape, data_type);
+    Tensor weights = create_tensor<Tensor>(weights_shape, data_type);
+    Tensor bias    = create_tensor<Tensor>(bias_shape, data_type);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    NEDepthwiseConvolutionLayer3x3 depthwise_layer;
+    depthwise_layer.configure(&src, &weights, &bias, &dst, info);
+
+    // Validate valid region
+    const ValidRegion input_valid_region   = shape_to_valid_region(input_shape);
+    const ValidRegion output_valid_region  = shape_to_valid_region(output_shape);
+    const ValidRegion weights_valid_region = shape_to_valid_region(weights_shape);
+    const ValidRegion bias_valid_region    = shape_to_valid_region(bias_shape);
+
+    validate(src.info()->valid_region(), input_valid_region);
+    validate(dst.info()->valid_region(), output_valid_region);
+    validate(weights.info()->valid_region(), weights_valid_region);
+    validate(bias.info()->valid_region(), bias_valid_region);
+
+    // Validate padding
+    const int         step    = 16 >> info.stride().first;
+    const PaddingSize padding = PaddingCalculator(output_shape.x(), step).required_padding();
+    validate(dst.info()->padding(), padding);
+}
+
+TEST_SUITE(Float)
+TEST_SUITE(F32)
+TEST_SUITE(Generic)
+template <typename T>
+using NEDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer, T>;
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+                                                                                                                       framework::dataset::make("DataType",
+                                                                                                                               DataType::F32)))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+                                                                                                                     framework::dataset::make("DataType",
+                                                                                                                             DataType::F32)))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(W3x3)
+template <typename T>
+using NEDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidationFixture<Tensor, Accessor, NEDepthwiseConvolutionLayer3x3, T>;
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                                                                                    framework::dataset::make("DataType",
+                                                                                                                            DataType::F32)))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+                                                                                                                        framework::dataset::make("DataType",
+                                                                                                                                DataType::F32)))
+{
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/NEON/L2Normalize.cpp b/tests/validation/NEON/L2Normalize.cpp
deleted file mode 100644
index ceffa6d510..0000000000
--- a/tests/validation/NEON/L2Normalize.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/NEON/functions/NEL2Normalize.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/NEON/Accessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/L2NormalizeFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-/** Tolerance for float operations */
-RelativeTolerance<float> tolerance_f32(0.00001f);
-} // namespace
-
-TEST_SUITE(NEON)
-TEST_SUITE(L2Normalize)
-
-template <typename T>
-using NEL2NormalizeFixture = L2NormalizeValidationFixture<Tensor, Accessor, NEL2Normalize, T>;
-
-TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEL2NormalizeFixture<float>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), framework::dataset::make("Epsilon", { 1e-12 })))
-{
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f32);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, NEL2NormalizeFixture<float>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), framework::dataset::make("Epsilon", { 1e-12 })))
-{
-    // Validate output
-    validate(Accessor(_target), _reference, tolerance_f32);
-}
-TEST_SUITE_END()
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/NEON/L2NormalizeLayer.cpp b/tests/validation/NEON/L2NormalizeLayer.cpp
new file mode 100644
index 0000000000..c0f5920964
--- /dev/null
+++ b/tests/validation/NEON/L2NormalizeLayer.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+#include "tests/NEON/Accessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/L2NormalizeLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Tolerance for float operations */
+RelativeTolerance<float> tolerance_f32(0.00001f);
+} // namespace
+
+TEST_SUITE(NEON)
+TEST_SUITE(L2NormalizeLayer)
+
+template <typename T>
+using NEL2NormalizeLayerFixture = L2NormalizeLayerValidationFixture<Tensor, Accessor, NEL2NormalizeLayer, T>;
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEL2NormalizeLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), framework::dataset::make("Epsilon", { 1e-12 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEL2NormalizeLayerFixture<float>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0 })), framework::dataset::make("Epsilon", { 1e-12 })))
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/fixtures/DepthConcatenateLayerFixture.h b/tests/validation/fixtures/DepthConcatenateLayerFixture.h
index 633dba23e0..103c73e4ea 100644
--- a/tests/validation/fixtures/DepthConcatenateLayerFixture.h
+++ b/tests/validation/fixtures/DepthConcatenateLayerFixture.h
@@ -43,7 +43,7 @@ namespace test
 namespace validation
 {
 template <typename TensorType, typename ITensorType, typename AccessorType, typename FunctionType, typename T>
-class DepthConcatenateValidationFixture : public framework::Fixture
+class DepthConcatenateLayerValidationFixture : public framework::Fixture
 {
 public:
     template <typename...>
diff --git a/tests/validation/fixtures/DepthConvertFixture.h b/tests/validation/fixtures/DepthConvertFixture.h
deleted file mode 100644
index b132a9341d..0000000000
--- a/tests/validation/fixtures/DepthConvertFixture.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_DEPTH_CONVERT_FIXTURE
-#define ARM_COMPUTE_TEST_DEPTH_CONVERT_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/DepthConvert.h"
-#include "tests/validation/Helpers.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
-class DepthConvertValidationFixedPointFixture : public framework::Fixture
-{
-public:
-    template <typename...>
-    void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, uint32_t fractional_bits)
-    {
-        _shift           = shift;
-        _fractional_bits = fractional_bits;
-        _target          = compute_target(shape, dt_in, dt_out, policy, shift, fractional_bits);
-        _reference       = compute_reference(shape, dt_in, dt_out, policy, shift, fractional_bits);
-    }
-
-protected:
-    template <typename U>
-    void fill(U &&tensor, int i)
-    {
-        library->fill_tensor_uniform(tensor, i);
-    }
-
-    TensorType compute_target(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, uint32_t fixed_point_position)
-    {
-        // Create tensors
-        TensorType src = create_tensor<TensorType>(shape, dt_in, 1, static_cast<int>(fixed_point_position));
-        TensorType dst = create_tensor<TensorType>(shape, dt_out, 1, static_cast<int>(fixed_point_position));
-
-        // Create and configure function
-        FunctionType depth_convert;
-        depth_convert.configure(&src, &dst, policy, shift);
-
-        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
-        // Allocate tensors
-        src.allocator()->allocate();
-        dst.allocator()->allocate();
-
-        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
-        // Fill tensors
-        fill(AccessorType(src), 0);
-
-        // Compute function
-        depth_convert.run();
-
-        return dst;
-    }
-
-    SimpleTensor<T2> compute_reference(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, uint32_t fixed_point_position)
-    {
-        // Create reference
-        SimpleTensor<T1> src{ shape, dt_in, 1, static_cast<int>(fixed_point_position) };
-
-        // Fill reference
-        fill(src, 0);
-
-        return reference::depth_convert<T1, T2>(src, dt_out, policy, shift);
-    }
-
-    TensorType       _target{};
-    SimpleTensor<T2> _reference{};
-    int              _fractional_bits{};
-    int              _shift{};
-};
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
-class DepthConvertValidationFixture : public DepthConvertValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T1, T2>
-{
-public:
-    template <typename...>
-    void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift)
-    {
-        DepthConvertValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, dt_in, dt_out, policy, shift, 0);
-    }
-};
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
-class DepthConvertValidationFractionalBitsFixture : public DepthConvertValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T1, T2>
-{
-public:
-    template <typename...>
-    void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t fractional_bits)
-    {
-        DepthConvertValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, dt_in, dt_out, policy, 0, fractional_bits);
-    }
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_DEPTH_CONVERT_FIXTURE */
diff --git a/tests/validation/fixtures/DepthConvertLayerFixture.h b/tests/validation/fixtures/DepthConvertLayerFixture.h
new file mode 100644
index 0000000000..c2fdc75bb4
--- /dev/null
+++ b/tests/validation/fixtures/DepthConvertLayerFixture.h
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_DEPTH_CONVERT_FIXTURE
+#define ARM_COMPUTE_TEST_DEPTH_CONVERT_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/CPP/DepthConvertLayer.h"
+#include "tests/validation/Helpers.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
+class DepthConvertLayerValidationFixedPointFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, uint32_t fractional_bits)
+    {
+        _shift           = shift;
+        _fractional_bits = fractional_bits;
+        _target          = compute_target(shape, dt_in, dt_out, policy, shift, fractional_bits);
+        _reference       = compute_reference(shape, dt_in, dt_out, policy, shift, fractional_bits);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        library->fill_tensor_uniform(tensor, i);
+    }
+
+    TensorType compute_target(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, uint32_t fixed_point_position)
+    {
+        // Create tensors
+        TensorType src = create_tensor<TensorType>(shape, dt_in, 1, static_cast<int>(fixed_point_position));
+        TensorType dst = create_tensor<TensorType>(shape, dt_out, 1, static_cast<int>(fixed_point_position));
+
+        // Create and configure function
+        FunctionType depth_convert;
+        depth_convert.configure(&src, &dst, policy, shift);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src), 0);
+
+        // Compute function
+        depth_convert.run();
+
+        return dst;
+    }
+
+    SimpleTensor<T2> compute_reference(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, uint32_t fixed_point_position)
+    {
+        // Create reference
+        SimpleTensor<T1> src{ shape, dt_in, 1, static_cast<int>(fixed_point_position) };
+
+        // Fill reference
+        fill(src, 0);
+
+        return reference::depth_convert<T1, T2>(src, dt_out, policy, shift);
+    }
+
+    TensorType       _target{};
+    SimpleTensor<T2> _reference{};
+    int              _fractional_bits{};
+    int              _shift{};
+};
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
+class DepthConvertLayerValidationFixture : public DepthConvertLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T1, T2>
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift)
+    {
+        DepthConvertLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, dt_in, dt_out, policy, shift, 0);
+    }
+};
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
+class DepthConvertLayerValidationFractionalBitsFixture : public DepthConvertLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T1, T2>
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t fractional_bits)
+    {
+        DepthConvertLayerValidationFixedPointFixture<TensorType, AccessorType, FunctionType, T1, T2>::setup(shape, dt_in, dt_out, policy, 0, fractional_bits);
+    }
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_DEPTH_CONVERT_FIXTURE */
diff --git a/tests/validation/fixtures/DepthwiseConvolutionFixture.h b/tests/validation/fixtures/DepthwiseConvolutionFixture.h
deleted file mode 100644
index b1d31d657a..0000000000
--- a/tests/validation/fixtures/DepthwiseConvolutionFixture.h
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE
-#define ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/DepthwiseConvolution.h"
-#include "tests/validation/Helpers.h"
-
-#include <random>
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class DepthwiseConvolutionValidationGenericFixture : public framework::Fixture
-{
-public:
-    using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value, int32_t, T>::type;
-
-public:
-    template <typename...>
-    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info)
-    {
-        _quantization_info = quantization_info;
-        _data_type         = data_type;
-
-        const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
-
-        _target    = compute_target(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info);
-        _reference = compute_reference(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info);
-    }
-
-protected:
-    template <typename U>
-    void fill(U &&tensor, int i)
-    {
-        switch(tensor.data_type())
-        {
-            case DataType::QASYMM8:
-            {
-                std::uniform_int_distribution<uint8_t> distribution(0, 10);
-                library->fill(tensor, distribution, i);
-                break;
-            }
-            case DataType::F32:
-            {
-                std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
-                library->fill(tensor, distribution, i);
-                break;
-            }
-            case DataType::S32:
-            {
-                std::uniform_int_distribution<int32_t> distribution(-1000, 1000);
-                library->fill(tensor, distribution, i);
-                break;
-            }
-            default:
-                library->fill_tensor_uniform(tensor, i);
-        }
-    }
-
-    TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &output_shape, PadStrideInfo &pad_stride_info,
-                              const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info)
-    {
-        // Create tensors
-        TensorType src     = create_tensor<TensorType>(input_shape, data_type, 1, 0, quantization_info);
-        TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1, 0, quantization_info);
-        TensorType biases  = create_tensor<TensorType>(biases_shape, bias_data_type, 1, 0, quantization_info);
-        TensorType dst     = create_tensor<TensorType>(output_shape, data_type, 1, 0, quantization_info);
-
-        // Create Depthwise Convolution configure function
-        FunctionType dwc;
-        dwc.configure(&src, &weights, &biases, &dst, pad_stride_info);
-
-        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(biases.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
-        // Allocate tensors
-        src.allocator()->allocate();
-        weights.allocator()->allocate();
-        biases.allocator()->allocate();
-        dst.allocator()->allocate();
-
-        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(!weights.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(!biases.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
-        // Fill tensors
-        fill(AccessorType(src), 0);
-        fill(AccessorType(weights), 1);
-        fill(AccessorType(biases), 2);
-
-        // Compute function
-        dwc.run();
-
-        return dst;
-    }
-
-    SimpleTensor<T> compute_reference(const TensorShape &in_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &out_shape, const PadStrideInfo &pad_stride_info,
-                                      const DataType data_type, const DataType bias_data_type, QuantizationInfo quantization_info)
-    {
-        SimpleTensor<T>     src{ in_shape, data_type, 1, 0, quantization_info };
-        SimpleTensor<T>     weights{ weights_shape, data_type, 1, 0, quantization_info };
-        SimpleTensor<TBias> biases{ biases_shape, data_type, 1, 0, quantization_info };
-
-        fill(src, 0);
-        fill(weights, 1);
-        fill(biases, 2);
-
-        return reference::depthwise_convolution(src, weights, biases, out_shape, pad_stride_info);
-    }
-
-    TensorType       _target{};
-    SimpleTensor<T>  _reference{};
-    DataType         _data_type{};
-    QuantizationInfo _quantization_info{};
-};
-
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class DepthwiseConvolutionValidationFixture : public DepthwiseConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
-{
-public:
-    template <typename...>
-    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type)
-    {
-        DepthwiseConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info,
-                                                                                                       data_type, QuantizationInfo());
-    }
-};
-
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class DepthwiseConvolutionValidationQuantizedFixture : public DepthwiseConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
-{
-public:
-    template <typename...>
-    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info)
-    {
-        DepthwiseConvolutionValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info,
-                                                                                                       data_type, quantization_info);
-    }
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE */
diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
new file mode 100644
index 0000000000..0af3fdf6c4
--- /dev/null
+++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE
+#define ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/CPP/DepthwiseConvolutionLayer.h"
+#include "tests/validation/Helpers.h"
+
+#include <random>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DepthwiseConvolutionLayerValidationGenericFixture : public framework::Fixture
+{
+public:
+    using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value, int32_t, T>::type;
+
+public:
+    template <typename...>
+    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info)
+    {
+        _quantization_info = quantization_info;
+        _data_type         = data_type;
+
+        const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
+
+        _target    = compute_target(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info);
+        _reference = compute_reference(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        switch(tensor.data_type())
+        {
+            case DataType::QASYMM8:
+            {
+                std::uniform_int_distribution<uint8_t> distribution(0, 10);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            case DataType::F32:
+            {
+                std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            case DataType::S32:
+            {
+                std::uniform_int_distribution<int32_t> distribution(-1000, 1000);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            default:
+                library->fill_tensor_uniform(tensor, i);
+        }
+    }
+
+    TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &output_shape, PadStrideInfo &pad_stride_info,
+                              const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info)
+    {
+        // Create tensors
+        TensorType src     = create_tensor<TensorType>(input_shape, data_type, 1, 0, quantization_info);
+        TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1, 0, quantization_info);
+        TensorType biases  = create_tensor<TensorType>(biases_shape, bias_data_type, 1, 0, quantization_info);
+        TensorType dst     = create_tensor<TensorType>(output_shape, data_type, 1, 0, quantization_info);
+
+        // Create Depthwise Convolution configure function
+        FunctionType dwc;
+        dwc.configure(&src, &weights, &biases, &dst, pad_stride_info);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(biases.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        weights.allocator()->allocate();
+        biases.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!biases.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src), 0);
+        fill(AccessorType(weights), 1);
+        fill(AccessorType(biases), 2);
+
+        // Compute function
+        dwc.run();
+
+        return dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &in_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &out_shape, const PadStrideInfo &pad_stride_info,
+                                      const DataType data_type, const DataType bias_data_type, QuantizationInfo quantization_info)
+    {
+        SimpleTensor<T>     src{ in_shape, data_type, 1, 0, quantization_info };
+        SimpleTensor<T>     weights{ weights_shape, data_type, 1, 0, quantization_info };
+        SimpleTensor<TBias> biases{ biases_shape, data_type, 1, 0, quantization_info };
+
+        fill(src, 0);
+        fill(weights, 1);
+        fill(biases, 2);
+
+        return reference::depthwise_convolution(src, weights, biases, out_shape, pad_stride_info);
+    }
+
+    TensorType       _target{};
+    SimpleTensor<T>  _reference{};
+    DataType         _data_type{};
+    QuantizationInfo _quantization_info{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DepthwiseConvolutionLayerValidationFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type)
+    {
+        DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info,
+                                                                                                            data_type, QuantizationInfo());
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DepthwiseConvolutionLayerValidationQuantizedFixture : public DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    template <typename...>
+    void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info)
+    {
+        DepthwiseConvolutionLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info,
+                                                                                                            data_type, quantization_info);
+    }
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_FIXTURE */
diff --git a/tests/validation/fixtures/L2NormalizeFixture.h b/tests/validation/fixtures/L2NormalizeFixture.h
deleted file mode 100644
index e6113937f1..0000000000
--- a/tests/validation/fixtures/L2NormalizeFixture.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2017 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE
-#define ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE
-
-#include "arm_compute/core/TensorShape.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/Globals.h"
-#include "tests/IAccessor.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Fixture.h"
-#include "tests/validation/CPP/L2Normalize.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class L2NormalizeValidationFixture : public framework::Fixture
-{
-public:
-    template <typename...>
-    void setup(TensorShape shape, DataType data_type, unsigned int axis, float epsilon)
-    {
-        _target    = compute_target(shape, data_type, axis, epsilon);
-        _reference = compute_reference(shape, data_type, axis, epsilon);
-    }
-
-protected:
-    template <typename U>
-    void fill(U &&tensor)
-    {
-        library->fill_tensor_uniform(tensor, 0);
-    }
-
-    TensorType compute_target(const TensorShape &shape, DataType data_type, unsigned int axis, float epsilon)
-    {
-        // Create tensors
-        TensorType src = create_tensor<TensorType>(shape, data_type);
-        TensorType dst = create_tensor<TensorType>(shape, data_type);
-
-        // Create and configure function
-        FunctionType l2_norm_func;
-        l2_norm_func.configure(&src, &dst, axis, epsilon);
-
-        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
-        // Allocate tensors
-        src.allocator()->allocate();
-        dst.allocator()->allocate();
-
-        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
-        // Fill tensors
-        fill(AccessorType(src));
-
-        // Compute function
-        l2_norm_func.run();
-
-        return dst;
-    }
-
-    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, unsigned int axis, float epsilon)
-    {
-        // Create reference
-        SimpleTensor<T> src{ shape, data_type };
-
-        // Fill reference
-        fill(src);
-
-        return reference::l2_normalize<T>(src, axis, epsilon);
-    }
-
-    TensorType      _target{};
-    SimpleTensor<T> _reference{};
-};
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE */
diff --git a/tests/validation/fixtures/L2NormalizeLayerFixture.h b/tests/validation/fixtures/L2NormalizeLayerFixture.h
new file mode 100644
index 0000000000..7bb95883f7
--- /dev/null
+++ b/tests/validation/fixtures/L2NormalizeLayerFixture.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE
+#define ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/CPP/L2NormalizeLayer.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class L2NormalizeLayerValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, DataType data_type, unsigned int axis, float epsilon)
+    {
+        _target    = compute_target(shape, data_type, axis, epsilon);
+        _reference = compute_reference(shape, data_type, axis, epsilon);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor)
+    {
+        library->fill_tensor_uniform(tensor, 0);
+    }
+
+    TensorType compute_target(const TensorShape &shape, DataType data_type, unsigned int axis, float epsilon)
+    {
+        // Create tensors
+        TensorType src = create_tensor<TensorType>(shape, data_type);
+        TensorType dst = create_tensor<TensorType>(shape, data_type);
+
+        // Create and configure function
+        FunctionType l2_norm_func;
+        l2_norm_func.configure(&src, &dst, axis, epsilon);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src));
+
+        // Compute function
+        l2_norm_func.run();
+
+        return dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type, unsigned int axis, float epsilon)
+    {
+        // Create reference
+        SimpleTensor<T> src{ shape, data_type };
+
+        // Fill reference
+        fill(src);
+
+        return reference::l2_normalize<T>(src, axis, epsilon);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_L2NORMALIZE_FIXTURE */
-- 
cgit v1.2.1