From 6ff3b19ee6120edf015fad8caab2991faa3070af Mon Sep 17 00:00:00 2001
From: Anthony Barbier <anthony.barbier@arm.com>
Date: Mon, 4 Sep 2017 18:44:23 +0100
Subject: COMPMID-344 Updated doxygen

Change-Id: I32f7b84daa560e460b77216add529c8fa8b327ae
---
 tests/CL/CLAccessor.h                              |  136 ++
 tests/CL/Helper.h                                  |   76 ++
 tests/CMakeLists.txt                               |   85 ++
 tests/Globals.h                                    |   38 +
 tests/IAccessor.h                                  |   89 ++
 tests/NEON/Helper.h                                |   77 ++
 tests/NEON/NEAccessor.h                            |  124 ++
 tests/ProgramOptions.cpp                           |   88 ++
 tests/ProgramOptions.h                             |  101 ++
 tests/RawTensor.cpp                                |  180 +++
 tests/RawTensor.h                                  |  159 +++
 tests/SConscript                                   |  150 +++
 tests/TensorCache.h                                |  118 ++
 tests/TensorLibrary.cpp                            |  475 +++++++
 tests/TensorLibrary.h                              |  656 ++++++++++
 tests/TypePrinter.h                                |  403 ++++++
 tests/TypeReader.h                                 |   67 +
 tests/Types.h                                      |   37 +
 tests/UserConfiguration.cpp                        |   55 +
 tests/UserConfiguration.h                          |  136 ++
 tests/Utils.h                                      |  672 ++++++++++
 tests/benchmark/CL/ActivationLayer.cpp             |  212 +++
 tests/benchmark/CL/BitwiseAnd.cpp                  |  133 ++
 tests/benchmark/CL/CMakeLists.txt                  |   57 +
 tests/benchmark/CL/ConvolutionLayer.cpp            |  277 ++++
 tests/benchmark/CL/FullyConnectedLayer.cpp         |  116 ++
 tests/benchmark/CL/GEMM.cpp                        |  492 +++++++
 tests/benchmark/CL/GEMM.h                          |  102 ++
 tests/benchmark/CL/NormalizationLayer.cpp          |   93 ++
 tests/benchmark/CL/PoolingLayer.cpp                |  141 ++
 tests/benchmark/CMakeLists.txt                     |  100 ++
 tests/benchmark/Datasets.h                         |   79 ++
 tests/benchmark/Instrument.h                       |  107 ++
 tests/benchmark/NEON/ActivationLayer.cpp           |  239 ++++
 tests/benchmark/NEON/BitwiseAnd.cpp                |  126 ++
 tests/benchmark/NEON/CMakeLists.txt                |   37 +
 tests/benchmark/NEON/ConvolutionLayer.cpp          |  303 +++++
 tests/benchmark/NEON/ConvolutionLayerDirect.cpp    |   74 ++
 tests/benchmark/NEON/FullyConnectedLayer.cpp       |  132 ++
 tests/benchmark/NEON/GEMM.cpp                      |  709 ++++++++++
 tests/benchmark/NEON/GEMM.h                        |  106 ++
 tests/benchmark/NEON/NormalizationLayer.cpp        |  111 ++
 tests/benchmark/NEON/PoolingLayer.cpp              |  162 +++
 tests/benchmark/PMUCounter.cpp                     |  144 ++
 tests/benchmark/PMUCounter.h                       |   71 +
 tests/benchmark/PerformanceProgramOptions.cpp      |   48 +
 tests/benchmark/PerformanceProgramOptions.h        |   45 +
 tests/benchmark/PerformanceUserConfiguration.cpp   |   45 +
 tests/benchmark/PerformanceUserConfiguration.h     |   57 +
 tests/benchmark/Profiler.cpp                       |   87 ++
 tests/benchmark/Profiler.h                         |   76 ++
 tests/benchmark/WallClockTimer.cpp                 |   56 +
 tests/benchmark/WallClockTimer.h                   |   53 +
 tests/benchmark/common/ActivationLayer.h           |   92 ++
 tests/benchmark/common/ConvolutionLayer.h          |  107 ++
 tests/benchmark/common/FullyConnectedLayer.h       |  108 ++
 tests/benchmark/common/NormalizationLayer.h        |   96 ++
 tests/benchmark/common/PoolingLayer.h              |   95 ++
 tests/benchmark/main.cpp                           |   96 ++
 tests/benchmark/system_tests/CL/AlexNet.cpp        |   87 ++
 tests/benchmark/system_tests/CL/LeNet5.cpp         |   82 ++
 tests/benchmark/system_tests/NEON/AlexNet.cpp      |  120 ++
 tests/benchmark/system_tests/NEON/LeNet5.cpp       |   80 ++
 tests/benchmark/system_tests/common/AlexNet.h      |   95 ++
 tests/benchmark/system_tests/common/LeNet5.h       |   82 ++
 tests/boost_wrapper.h                              |   40 +
 tests/dataset/ActivationFunctionDataset.h          |   66 +
 tests/dataset/ActivationLayerDataset.h             |  177 +++
 tests/dataset/BatchNormalizationLayerDataset.h     |   90 ++
 tests/dataset/BorderModeDataset.h                  |   82 ++
 tests/dataset/ConvertPolicyDataset.h               |   82 ++
 tests/dataset/ConvolutionLayerDataset.h            |  269 ++++
 tests/dataset/DataTypeDatasets.h                   |  193 +++
 tests/dataset/FullyConnectedLayerDataset.h         |  155 +++
 tests/dataset/GEMMDataset.h                        |  204 +++
 tests/dataset/GenericDataset.h                     |   97 ++
 tests/dataset/ImageDatasets.h                      |  120 ++
 tests/dataset/InterpolationPolicyDataset.h         |   80 ++
 tests/dataset/NormalizationLayerDataset.h          |   99 ++
 tests/dataset/NormalizationTypeDataset.h           |   80 ++
 tests/dataset/PoolingLayerDataset.h                |  159 +++
 tests/dataset/RoundingPolicyDataset.h              |   82 ++
 tests/dataset/ShapeDatasets.h                      |  130 ++
 tests/dataset/ThresholdDataset.h                   |   95 ++
 tests/model_objects/AlexNet.h                      |  582 +++++++++
 tests/model_objects/LeNet5.h                       |  277 ++++
 tests/validation/CL/BitwiseAnd.cpp                 |  218 ++++
 tests/validation/CL/CLFixture.cpp                  |   33 +
 tests/validation/CL/CLFixture.h                    |   48 +
 tests/validation/CL/CMakeLists.txt                 |   48 +
 tests/validation/CL/DepthConvert.cpp               |  413 ++++++
 tests/validation/CL/FillBorder.cpp                 |   91 ++
 tests/validation/CL/Threshold.cpp                  |  154 +++
 tests/validation/CMakeLists.txt                    |   96 ++
 tests/validation/Datasets.h                        |  238 ++++
 tests/validation/FixedPoint.h                      |  975 ++++++++++++++
 tests/validation/Helpers.h                         |  123 ++
 tests/validation/NEON/AbsoluteDifference.cpp       |  201 +++
 tests/validation/NEON/Accumulate.cpp               |  146 +++
 tests/validation/NEON/AccumulateSquared.cpp        |  147 +++
 tests/validation/NEON/AccumulateWeighted.cpp       |  146 +++
 tests/validation/NEON/ActivationLayer.cpp          |  217 ++++
 tests/validation/NEON/ArithmeticAddition.cpp       |  228 ++++
 tests/validation/NEON/ArithmeticSubtraction.cpp    |  228 ++++
 tests/validation/NEON/BatchNormalizationLayer.cpp  |  195 +++
 tests/validation/NEON/BitwiseAnd.cpp               |  218 ++++
 tests/validation/NEON/BitwiseNot.cpp               |  142 ++
 tests/validation/NEON/BitwiseOr.cpp                |  150 +++
 tests/validation/NEON/BitwiseXor.cpp               |  150 +++
 tests/validation/NEON/Box3x3.cpp                   |  145 +++
 tests/validation/NEON/CMakeLists.txt               |   55 +
 tests/validation/NEON/ConvolutionLayer.cpp         |  200 +++
 tests/validation/NEON/ConvolutionLayerDirect.cpp   |  219 ++++
 tests/validation/NEON/DepthConvert.cpp             |  500 +++++++
 tests/validation/NEON/FillBorder.cpp               |   90 ++
 tests/validation/NEON/Fixedpoint/Exp_QS8.cpp       |  124 ++
 tests/validation/NEON/Fixedpoint/Invsqrt_QS8.cpp   |  123 ++
 tests/validation/NEON/Fixedpoint/Log_QS8.cpp       |  123 ++
 .../validation/NEON/Fixedpoint/Reciprocal_QS8.cpp  |  123 ++
 tests/validation/NEON/FullyConnectedLayer.cpp      |  221 ++++
 tests/validation/NEON/GEMM.cpp                     |  203 +++
 tests/validation/NEON/IntegralImage.cpp            |  145 +++
 tests/validation/NEON/NormalizationLayer.cpp       |  152 +++
 tests/validation/NEON/PixelWiseMultiplication.cpp  |  428 ++++++
 tests/validation/NEON/Pooling/PoolingLayer.cpp     |  139 ++
 tests/validation/NEON/SoftmaxLayer.cpp             |  196 +++
 tests/validation/NEON/Threshold.cpp                |  154 +++
 tests/validation/Reference.cpp                     |  596 +++++++++
 tests/validation/Reference.h                       |  303 +++++
 tests/validation/ReferenceCPP.cpp                  |  282 ++++
 tests/validation/ReferenceCPP.h                    |  250 ++++
 tests/validation/Tensor.h                          |  111 ++
 tests/validation/TensorFactory.h                   |  113 ++
 tests/validation/TensorOperations.h                | 1370 ++++++++++++++++++++
 tests/validation/TensorVisitors.h                  |  386 ++++++
 tests/validation/UNIT/CMakeLists.txt               |   37 +
 tests/validation/UNIT/FixedPoint.cpp               |  163 +++
 tests/validation/UNIT/TensorInfo.cpp               |   91 ++
 tests/validation/UNIT/TensorShape.cpp              |   70 +
 tests/validation/UNIT/Utils.cpp                    |   95 ++
 tests/validation/Validation.cpp                    |  359 +++++
 tests/validation/Validation.h                      |  127 ++
 tests/validation/ValidationProgramOptions.cpp      |   50 +
 tests/validation/ValidationProgramOptions.h        |   45 +
 tests/validation/ValidationUserConfiguration.h     |   42 +
 tests/validation/main.cpp                          |  104 ++
 tests/validation/system_tests/CL/AlexNet.cpp       |  111 ++
 tests/validation/system_tests/CL/LeNet5.cpp        |   94 ++
 tests/validation/system_tests/NEON/AlexNet.cpp     |  112 ++
 tests/validation/system_tests/NEON/LeNet5.cpp      |   94 ++
 150 files changed, 25761 insertions(+)
 create mode 100644 tests/CL/CLAccessor.h
 create mode 100644 tests/CL/Helper.h
 create mode 100644 tests/CMakeLists.txt
 create mode 100644 tests/Globals.h
 create mode 100644 tests/IAccessor.h
 create mode 100644 tests/NEON/Helper.h
 create mode 100644 tests/NEON/NEAccessor.h
 create mode 100644 tests/ProgramOptions.cpp
 create mode 100644 tests/ProgramOptions.h
 create mode 100644 tests/RawTensor.cpp
 create mode 100644 tests/RawTensor.h
 create mode 100644 tests/SConscript
 create mode 100644 tests/TensorCache.h
 create mode 100644 tests/TensorLibrary.cpp
 create mode 100644 tests/TensorLibrary.h
 create mode 100644 tests/TypePrinter.h
 create mode 100644 tests/TypeReader.h
 create mode 100644 tests/Types.h
 create mode 100644 tests/UserConfiguration.cpp
 create mode 100644 tests/UserConfiguration.h
 create mode 100644 tests/Utils.h
 create mode 100644 tests/benchmark/CL/ActivationLayer.cpp
 create mode 100644 tests/benchmark/CL/BitwiseAnd.cpp
 create mode 100644 tests/benchmark/CL/CMakeLists.txt
 create mode 100644 tests/benchmark/CL/ConvolutionLayer.cpp
 create mode 100644 tests/benchmark/CL/FullyConnectedLayer.cpp
 create mode 100644 tests/benchmark/CL/GEMM.cpp
 create mode 100644 tests/benchmark/CL/GEMM.h
 create mode 100644 tests/benchmark/CL/NormalizationLayer.cpp
 create mode 100644 tests/benchmark/CL/PoolingLayer.cpp
 create mode 100644 tests/benchmark/CMakeLists.txt
 create mode 100644 tests/benchmark/Datasets.h
 create mode 100644 tests/benchmark/Instrument.h
 create mode 100644 tests/benchmark/NEON/ActivationLayer.cpp
 create mode 100644 tests/benchmark/NEON/BitwiseAnd.cpp
 create mode 100644 tests/benchmark/NEON/CMakeLists.txt
 create mode 100644 tests/benchmark/NEON/ConvolutionLayer.cpp
 create mode 100644 tests/benchmark/NEON/ConvolutionLayerDirect.cpp
 create mode 100644 tests/benchmark/NEON/FullyConnectedLayer.cpp
 create mode 100644 tests/benchmark/NEON/GEMM.cpp
 create mode 100644 tests/benchmark/NEON/GEMM.h
 create mode 100644 tests/benchmark/NEON/NormalizationLayer.cpp
 create mode 100644 tests/benchmark/NEON/PoolingLayer.cpp
 create mode 100644 tests/benchmark/PMUCounter.cpp
 create mode 100644 tests/benchmark/PMUCounter.h
 create mode 100644 tests/benchmark/PerformanceProgramOptions.cpp
 create mode 100644 tests/benchmark/PerformanceProgramOptions.h
 create mode 100644 tests/benchmark/PerformanceUserConfiguration.cpp
 create mode 100644 tests/benchmark/PerformanceUserConfiguration.h
 create mode 100644 tests/benchmark/Profiler.cpp
 create mode 100644 tests/benchmark/Profiler.h
 create mode 100644 tests/benchmark/WallClockTimer.cpp
 create mode 100644 tests/benchmark/WallClockTimer.h
 create mode 100644 tests/benchmark/common/ActivationLayer.h
 create mode 100644 tests/benchmark/common/ConvolutionLayer.h
 create mode 100644 tests/benchmark/common/FullyConnectedLayer.h
 create mode 100644 tests/benchmark/common/NormalizationLayer.h
 create mode 100644 tests/benchmark/common/PoolingLayer.h
 create mode 100644 tests/benchmark/main.cpp
 create mode 100644 tests/benchmark/system_tests/CL/AlexNet.cpp
 create mode 100644 tests/benchmark/system_tests/CL/LeNet5.cpp
 create mode 100644 tests/benchmark/system_tests/NEON/AlexNet.cpp
 create mode 100644 tests/benchmark/system_tests/NEON/LeNet5.cpp
 create mode 100644 tests/benchmark/system_tests/common/AlexNet.h
 create mode 100644 tests/benchmark/system_tests/common/LeNet5.h
 create mode 100644 tests/boost_wrapper.h
 create mode 100644 tests/dataset/ActivationFunctionDataset.h
 create mode 100644 tests/dataset/ActivationLayerDataset.h
 create mode 100644 tests/dataset/BatchNormalizationLayerDataset.h
 create mode 100644 tests/dataset/BorderModeDataset.h
 create mode 100644 tests/dataset/ConvertPolicyDataset.h
 create mode 100644 tests/dataset/ConvolutionLayerDataset.h
 create mode 100644 tests/dataset/DataTypeDatasets.h
 create mode 100644 tests/dataset/FullyConnectedLayerDataset.h
 create mode 100644 tests/dataset/GEMMDataset.h
 create mode 100644 tests/dataset/GenericDataset.h
 create mode 100644 tests/dataset/ImageDatasets.h
 create mode 100644 tests/dataset/InterpolationPolicyDataset.h
 create mode 100644 tests/dataset/NormalizationLayerDataset.h
 create mode 100644 tests/dataset/NormalizationTypeDataset.h
 create mode 100644 tests/dataset/PoolingLayerDataset.h
 create mode 100644 tests/dataset/RoundingPolicyDataset.h
 create mode 100644 tests/dataset/ShapeDatasets.h
 create mode 100644 tests/dataset/ThresholdDataset.h
 create mode 100644 tests/model_objects/AlexNet.h
 create mode 100644 tests/model_objects/LeNet5.h
 create mode 100644 tests/validation/CL/BitwiseAnd.cpp
 create mode 100644 tests/validation/CL/CLFixture.cpp
 create mode 100644 tests/validation/CL/CLFixture.h
 create mode 100644 tests/validation/CL/CMakeLists.txt
 create mode 100644 tests/validation/CL/DepthConvert.cpp
 create mode 100644 tests/validation/CL/FillBorder.cpp
 create mode 100644 tests/validation/CL/Threshold.cpp
 create mode 100644 tests/validation/CMakeLists.txt
 create mode 100644 tests/validation/Datasets.h
 create mode 100644 tests/validation/FixedPoint.h
 create mode 100644 tests/validation/Helpers.h
 create mode 100644 tests/validation/NEON/AbsoluteDifference.cpp
 create mode 100644 tests/validation/NEON/Accumulate.cpp
 create mode 100644 tests/validation/NEON/AccumulateSquared.cpp
 create mode 100644 tests/validation/NEON/AccumulateWeighted.cpp
 create mode 100644 tests/validation/NEON/ActivationLayer.cpp
 create mode 100644 tests/validation/NEON/ArithmeticAddition.cpp
 create mode 100644 tests/validation/NEON/ArithmeticSubtraction.cpp
 create mode 100644 tests/validation/NEON/BatchNormalizationLayer.cpp
 create mode 100644 tests/validation/NEON/BitwiseAnd.cpp
 create mode 100644 tests/validation/NEON/BitwiseNot.cpp
 create mode 100644 tests/validation/NEON/BitwiseOr.cpp
 create mode 100644 tests/validation/NEON/BitwiseXor.cpp
 create mode 100644 tests/validation/NEON/Box3x3.cpp
 create mode 100644 tests/validation/NEON/CMakeLists.txt
 create mode 100644 tests/validation/NEON/ConvolutionLayer.cpp
 create mode 100644 tests/validation/NEON/ConvolutionLayerDirect.cpp
 create mode 100644 tests/validation/NEON/DepthConvert.cpp
 create mode 100644 tests/validation/NEON/FillBorder.cpp
 create mode 100644 tests/validation/NEON/Fixedpoint/Exp_QS8.cpp
 create mode 100644 tests/validation/NEON/Fixedpoint/Invsqrt_QS8.cpp
 create mode 100644 tests/validation/NEON/Fixedpoint/Log_QS8.cpp
 create mode 100644 tests/validation/NEON/Fixedpoint/Reciprocal_QS8.cpp
 create mode 100644 tests/validation/NEON/FullyConnectedLayer.cpp
 create mode 100644 tests/validation/NEON/GEMM.cpp
 create mode 100644 tests/validation/NEON/IntegralImage.cpp
 create mode 100644 tests/validation/NEON/NormalizationLayer.cpp
 create mode 100644 tests/validation/NEON/PixelWiseMultiplication.cpp
 create mode 100644 tests/validation/NEON/Pooling/PoolingLayer.cpp
 create mode 100644 tests/validation/NEON/SoftmaxLayer.cpp
 create mode 100644 tests/validation/NEON/Threshold.cpp
 create mode 100644 tests/validation/Reference.cpp
 create mode 100644 tests/validation/Reference.h
 create mode 100644 tests/validation/ReferenceCPP.cpp
 create mode 100644 tests/validation/ReferenceCPP.h
 create mode 100644 tests/validation/Tensor.h
 create mode 100644 tests/validation/TensorFactory.h
 create mode 100644 tests/validation/TensorOperations.h
 create mode 100644 tests/validation/TensorVisitors.h
 create mode 100644 tests/validation/UNIT/CMakeLists.txt
 create mode 100644 tests/validation/UNIT/FixedPoint.cpp
 create mode 100644 tests/validation/UNIT/TensorInfo.cpp
 create mode 100644 tests/validation/UNIT/TensorShape.cpp
 create mode 100644 tests/validation/UNIT/Utils.cpp
 create mode 100644 tests/validation/Validation.cpp
 create mode 100644 tests/validation/Validation.h
 create mode 100644 tests/validation/ValidationProgramOptions.cpp
 create mode 100644 tests/validation/ValidationProgramOptions.h
 create mode 100644 tests/validation/ValidationUserConfiguration.h
 create mode 100644 tests/validation/main.cpp
 create mode 100644 tests/validation/system_tests/CL/AlexNet.cpp
 create mode 100644 tests/validation/system_tests/CL/LeNet5.cpp
 create mode 100644 tests/validation/system_tests/NEON/AlexNet.cpp
 create mode 100644 tests/validation/system_tests/NEON/LeNet5.cpp

(limited to 'tests')

diff --git a/tests/CL/CLAccessor.h b/tests/CL/CLAccessor.h
new file mode 100644
index 0000000000..21db3ee23d
--- /dev/null
+++ b/tests/CL/CLAccessor.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_CL_CLACCESSOR_H__
+#define __ARM_COMPUTE_TEST_CL_CLACCESSOR_H__
+
+#include "IAccessor.h"
+
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace cl
+{
+/** Accessor implementation for @ref CLTensor objects. */
+class CLAccessor : public IAccessor
+{
+public:
+    /** Create an accessor for the given @p tensor.
+     *
+     * @param[in, out] tensor To be accessed tensor.
+     *
+     * @note The CL memory is mapped by the constructor.
+     *
+     */
+    CLAccessor(CLTensor &tensor);
+
+    CLAccessor(const CLAccessor &) = delete;
+    CLAccessor &operator=(const CLAccessor &) = delete;
+    CLAccessor(CLAccessor &&)                 = default;
+    CLAccessor &operator=(CLAccessor &&) = default;
+
+    /** Destructor that unmaps the CL memory. */
+    ~CLAccessor();
+
+    TensorShape shape() const override;
+    size_t      element_size() const override;
+    size_t      size() const override;
+    Format      format() const override;
+    DataType    data_type() const override;
+    int         num_channels() const override;
+    int         num_elements() const override;
+    int         fixed_point_position() const override;
+    const void *operator()(const Coordinates &coord) const override;
+    void *operator()(const Coordinates &coord) override;
+
+private:
+    CLTensor &_tensor;
+};
+
+inline CLAccessor::CLAccessor(CLTensor &tensor)
+    : _tensor{ tensor }
+{
+    _tensor.map();
+}
+
+inline CLAccessor::~CLAccessor()
+{
+    _tensor.unmap();
+}
+
+inline TensorShape CLAccessor::shape() const
+{
+    return _tensor.info()->tensor_shape();
+}
+
+inline size_t CLAccessor::element_size() const
+{
+    return _tensor.info()->element_size();
+}
+
+inline size_t CLAccessor::size() const
+{
+    return _tensor.info()->total_size();
+}
+
+inline Format CLAccessor::format() const
+{
+    return _tensor.info()->format();
+}
+
+inline DataType CLAccessor::data_type() const
+{
+    return _tensor.info()->data_type();
+}
+
+inline int CLAccessor::num_channels() const
+{
+    return _tensor.info()->num_channels();
+}
+
+inline int CLAccessor::num_elements() const
+{
+    return _tensor.info()->tensor_shape().total_size();
+}
+
+inline int CLAccessor::fixed_point_position() const
+{
+    return _tensor.info()->fixed_point_position();
+}
+
+inline const void *CLAccessor::operator()(const Coordinates &coord) const
+{
+    return _tensor.ptr_to_element(coord);
+}
+
+inline void *CLAccessor::operator()(const Coordinates &coord)
+{
+    return _tensor.ptr_to_element(coord);
+}
+} // cl
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/CL/Helper.h b/tests/CL/Helper.h
new file mode 100644
index 0000000000..a6063e95ae
--- /dev/null
+++ b/tests/CL/Helper.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_CL_HELPER_H__
+#define __ARM_COMPUTE_TEST_CL_HELPER_H__
+
+#include "Globals.h"
+#include "TensorLibrary.h"
+
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace cl
+{
+/** Helper to create an empty tensor.
+ *
+ * @param[in] shape                Desired shape.
+ * @param[in] data_type            Desired data type.
+ * @param[in] num_channels         (Optional) It indicates the number of channels for each tensor element
+ * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16.
+ *
+ * @return Empty @ref CLTensor with the specified shape and data type.
+ */
+inline CLTensor create_tensor(const TensorShape &shape, DataType data_type, int num_channels = 1, int fixed_point_position = 0)
+{
+    CLTensor tensor;
+    tensor.allocator()->init(TensorInfo(shape, num_channels, data_type, fixed_point_position));
+
+    return tensor;
+}
+
+/** Helper to create an empty tensor.
+ *
+ * @param[in] name      File name from which to get the dimensions.
+ * @param[in] data_type Desired data type.
+ *
+ * @return Empty @ref CLTensor with the specified shape and data type.
+ */
+inline CLTensor create_tensor(const std::string &name, DataType data_type)
+{
+    constexpr unsigned int num_channels = 1;
+
+    const RawTensor &raw = library->get(name);
+
+    CLTensor tensor;
+    tensor.allocator()->init(TensorInfo(raw.shape(), num_channels, data_type));
+
+    return tensor;
+}
+} // namespace cl
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
new file mode 100644
index 0000000000..3c4f5029b1
--- /dev/null
+++ b/tests/CMakeLists.txt
@@ -0,0 +1,85 @@
+# Copyright (c) 2017 ARM Limited.
+#
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+cmake_minimum_required (VERSION 3.1)
+project (arm_compute_test)
+
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+add_library(boost_unit_test_framework STATIC IMPORTED)
+set_target_properties(boost_unit_test_framework PROPERTIES
+    IMPORTED_LOCATION "${CMAKE_SOURCE_DIR}/../3rdparty/linux/armv7a/libboost_unit_test_framework.a"
+)
+
+add_library(boost_program_options STATIC IMPORTED)
+set_target_properties(boost_program_options PROPERTIES
+    IMPORTED_LOCATION "${CMAKE_SOURCE_DIR}/../3rdparty/linux/armv7a/libboost_program_options.a"
+)
+
+add_library(arm_compute SHARED IMPORTED)
+set_target_properties(arm_compute PROPERTIES
+    IMPORTED_LOCATION "${CMAKE_SOURCE_DIR}/../build/libarm_compute.so"
+)
+
+include_directories("${CMAKE_SOURCE_DIR}")
+include_directories("${CMAKE_SOURCE_DIR}/..")
+include_directories("${CMAKE_SOURCE_DIR}/../3rdparty/include/")
+
+# TensorLibrary
+set(tensor_library_SOURCE_FILES
+    ${CMAKE_SOURCE_DIR}/RawTensor.h
+    ${CMAKE_SOURCE_DIR}/RawTensor.cpp
+    ${CMAKE_SOURCE_DIR}/TensorCache.h
+    ${CMAKE_SOURCE_DIR}/TensorLibrary.h
+    ${CMAKE_SOURCE_DIR}/TensorLibrary.cpp
+)
+
+add_library(tensor_library OBJECT
+    ${tensor_library_SOURCE_FILES}
+)
+
+set(arm_compute_test_SOURCE_FILES
+    ${CMAKE_SOURCE_DIR}/BorderModeDataset.h
+    ${CMAKE_SOURCE_DIR}/ConvertPolicyDataset.h
+    ${CMAKE_SOURCE_DIR}/Globals.h
+    ${CMAKE_SOURCE_DIR}/IAccessor.h
+    ${CMAKE_SOURCE_DIR}/ImageDatasets.h
+    ${CMAKE_SOURCE_DIR}/InterpolationPolicyDataset.h
+    ${CMAKE_SOURCE_DIR}/NormalizationTypeDataset.h
+    ${CMAKE_SOURCE_DIR}/ProgramOptions.h
+    ${CMAKE_SOURCE_DIR}/ProgramOptions.cpp
+    ${CMAKE_SOURCE_DIR}/RoundingPolicyDataset.h
+    ${CMAKE_SOURCE_DIR}/ShapeDatasets.h
+    ${CMAKE_SOURCE_DIR}/TypePrinter.h
+    ${CMAKE_SOURCE_DIR}/TypeReader.h
+    ${CMAKE_SOURCE_DIR}/UserConfiguration.h
+    ${CMAKE_SOURCE_DIR}/UserConfiguration.cpp
+    ${CMAKE_SOURCE_DIR}/Utils.h
+    ${CMAKE_SOURCE_DIR}/boost_wrapper.h
+)
+
+add_library(arm_compute_test OBJECT
+    ${arm_compute_test_SOURCE_FILES}
+)
+
+add_subdirectory(validation)
+add_subdirectory(benchmark)
diff --git a/tests/Globals.h b/tests/Globals.h
new file mode 100644
index 0000000000..a44d7ce83b
--- /dev/null
+++ b/tests/Globals.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_GLOBALS_H__
+#define __ARM_COMPUTE_TEST_GLOBALS_H__
+
+#include "TensorLibrary.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+namespace test
+{
+extern std::unique_ptr<TensorLibrary> library;
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/IAccessor.h b/tests/IAccessor.h
new file mode 100644
index 0000000000..3c06dc36be
--- /dev/null
+++ b/tests/IAccessor.h
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_IACCESSOR_H__
+#define __ARM_COMPUTE_TEST_IACCESSOR_H__
+
+#include "arm_compute/core/Coordinates.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace test
+{
+/** Common interface to provide information and access to tensor like
+ * structures.
+ */
+class IAccessor
+{
+public:
+    /** Pure virtual destructor. */
+    virtual ~IAccessor() = 0;
+
+    /** Shape of the tensor. */
+    virtual TensorShape shape() const = 0;
+
+    /** Size of each element in the tensor in bytes. */
+    virtual size_t element_size() const = 0;
+
+    /** Total size of the tensor in bytes. */
+    virtual size_t size() const = 0;
+
+    /** Image format of the tensor. */
+    virtual Format format() const = 0;
+
+    /** Data type of the tensor. */
+    virtual DataType data_type() const = 0;
+
+    /** Number of channels of the tensor. */
+    virtual int num_channels() const = 0;
+
+    /** Number of elements of the tensor. */
+    virtual int num_elements() const = 0;
+
+    /** Number of bits for the fractional part. */
+    virtual int fixed_point_position() const = 0;
+
+    /** Read only access to the specified element.
+     *
+     * @param[in] coord Coordinates of the desired element.
+     *
+     * @return A pointer to the desired element.
+     */
+    virtual const void *operator()(const Coordinates &coord) const = 0;
+
+    /** Access to the specified element.
+     *
+     * @param[in] coord Coordinates of the desired element.
+     *
+     * @return A pointer to the desired element.
+     */
+    virtual void *operator()(const Coordinates &coord) = 0;
+};
+
+inline IAccessor::~IAccessor()
+{
+}
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/NEON/Helper.h b/tests/NEON/Helper.h
new file mode 100644
index 0000000000..c8f1c2e635
--- /dev/null
+++ b/tests/NEON/Helper.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_NEON_HELPER_H__
+#define __ARM_COMPUTE_TEST_NEON_HELPER_H__
+
+#include "Globals.h"
+#include "TensorLibrary.h"
+
+#include "arm_compute/runtime/Tensor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace neon
+{
+/** Helper to create an empty tensor.
+ *
+ * @param[in] shape                Desired shape.
+ * @param[in] data_type            Desired data type.
+ * @param[in] num_channels         (Optional) It indicates the number of channels for each tensor element
+ * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16.
+ *
+ * @return Empty @ref Tensor with the specified shape and data type.
+ */
+inline Tensor create_tensor(const TensorShape &shape, DataType data_type, int num_channels = 1, int fixed_point_position = 0)
+{
+    Tensor tensor;
+    tensor.allocator()->init(TensorInfo(shape, num_channels, data_type, fixed_point_position));
+
+    return tensor;
+}
+
+/** Helper to create an empty tensor.
+ *
+ * @param[in] name                 File name from which to get the dimensions.
+ * @param[in] data_type            Desired data type.
+ * @param[in] fixed_point_position (Optional) Number of bits for the fractional part of the fixed point numbers
+ *
+ * @return Empty @ref Tensor with the specified shape and data type.
+ */
+inline Tensor create_tensor(const std::string &name, DataType data_type, int fixed_point_position = 0)
+{
+    constexpr unsigned int num_channels = 1;
+
+    const RawTensor &raw = library->get(name);
+
+    Tensor tensor;
+    tensor.allocator()->init(TensorInfo(raw.shape(), num_channels, data_type, fixed_point_position));
+
+    return tensor;
+}
+} // namespace neon
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/NEON/NEAccessor.h b/tests/NEON/NEAccessor.h
new file mode 100644
index 0000000000..be28c27d98
--- /dev/null
+++ b/tests/NEON/NEAccessor.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_NEON_NEACCESSOR_H__
+#define __ARM_COMPUTE_TEST_NEON_NEACCESSOR_H__
+
+#include "IAccessor.h"
+
+#include "arm_compute/runtime/Tensor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace neon
+{
+/** Accessor implementation for @ref Tensor objects. */
+class NEAccessor : public IAccessor
+{
+public:
+    /** Create an accessor for the given @p tensor.
+     *
+     * @param[in, out] tensor To be accessed tensor.
+     */
+    NEAccessor(Tensor &tensor);
+
+    NEAccessor(const NEAccessor &) = delete;
+    NEAccessor &operator=(const NEAccessor &) = delete;
+    NEAccessor(NEAccessor &&)                 = default;
+    NEAccessor &operator=(NEAccessor &&) = default;
+
+    TensorShape shape() const override;
+    size_t      element_size() const override;
+    size_t      size() const override;
+    Format      format() const override;
+    DataType    data_type() const override;
+    int         num_channels() const override;
+    int         num_elements() const override;
+    int         fixed_point_position() const override;
+    const void *operator()(const Coordinates &coord) const override;
+    void *operator()(const Coordinates &coord) override;
+
+private:
+    Tensor &_tensor;
+};
+
+inline NEAccessor::NEAccessor(Tensor &tensor)
+    : _tensor{ tensor }
+{
+}
+
+inline TensorShape NEAccessor::shape() const
+{
+    return _tensor.info()->tensor_shape();
+}
+
+inline size_t NEAccessor::element_size() const
+{
+    return _tensor.info()->element_size();
+}
+
+inline size_t NEAccessor::size() const
+{
+    return _tensor.info()->total_size();
+}
+
+inline Format NEAccessor::format() const
+{
+    return _tensor.info()->format();
+}
+
+inline DataType NEAccessor::data_type() const
+{
+    return _tensor.info()->data_type();
+}
+
+inline int NEAccessor::num_channels() const
+{
+    return _tensor.info()->num_channels();
+}
+
+inline int NEAccessor::num_elements() const
+{
+    return _tensor.info()->tensor_shape().total_size();
+}
+
+inline int NEAccessor::fixed_point_position() const
+{
+    return _tensor.info()->fixed_point_position();
+}
+
+inline const void *NEAccessor::operator()(const Coordinates &coord) const
+{
+    return _tensor.ptr_to_element(coord);
+}
+
+inline void *NEAccessor::operator()(const Coordinates &coord)
+{
+    return _tensor.ptr_to_element(coord);
+}
+} // namespace neon
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/ProgramOptions.cpp b/tests/ProgramOptions.cpp
new file mode 100644
index 0000000000..0ae92f64e7
--- /dev/null
+++ b/tests/ProgramOptions.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ProgramOptions.h"
+
+#include "TypePrinter.h"
+#include "TypeReader.h"
+
+#include "arm_compute/core/Types.h"
+
+#include <random>
+#include <sstream>
+
+namespace arm_compute
+{
+namespace test
+{
+ProgramOptions::ProgramOptions()
+{
+    boost::program_options::options_description generic("Generic options");
+    generic.add_options()("help", "Print help message")("seed", boost::program_options::value<std::random_device::result_type>(), "Seed for the tensor library");
+
+    _visible.add(generic);
+
+    _hidden.add_options()("path", boost::program_options::value<std::string>(), "Path from where to load the asset/s");
+
+    _positional.add("path", 1);
+}
+
+void ProgramOptions::add_options(const boost::program_options::options_description &options)
+{
+    _visible.add(options);
+}
+
+bool ProgramOptions::wants_help() const
+{
+    return (_vm.count("help") != 0);
+}
+
+std::string ProgramOptions::get_help() const
+{
+    std::stringstream help;
+    help << _visible;
+
+    return help.str();
+}
+
+void ProgramOptions::parse_commandline(int argc, char **argv)
+{
+    boost::program_options::options_description all;
+    all.add(_visible).add(_hidden);
+
+    boost::program_options::store(boost::program_options::command_line_parser(argc, argv)
+                                  .options(all)
+                                  .positional(_positional)
+                                  .allow_unregistered()
+                                  .run(),
+                                  _vm);
+
+    if(_vm.count("help") == 0 && _vm.count("path") == 0)
+    {
+        throw boost::program_options::required_option("PATH");
+    }
+
+    boost::program_options::notify(_vm);
+}
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/ProgramOptions.h b/tests/ProgramOptions.h
new file mode 100644
index 0000000000..b61ae01b30
--- /dev/null
+++ b/tests/ProgramOptions.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_PROGRAM_OPTIONS_H__
+#define __ARM_COMPUTE_TEST_PROGRAM_OPTIONS_H__
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Weffc++"
+#pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
+#pragma GCC diagnostic ignored "-Wctor-dtor-privacy"
+#include "boost/program_options.hpp"
+#pragma GCC diagnostic pop
+
+#include <random>
+#include <sstream>
+
+namespace arm_compute
+{
+namespace test
+{
+/** Defines available commandline arguments and allows to parse them. */
+class ProgramOptions
+{
+public:
+    /** Defines available options. */
+    ProgramOptions();
+
+    /** Signals if the --help flag has been passed on the commandline. */
+    bool wants_help() const;
+
+    /** Returns a string describing all available options. */
+    std::string get_help() const;
+
+    /** Parses the given arguments and makes them available via @ref get.
+     *
+     * @param[in] argc Number of command line arguments.
+     * @param[in] argv Pointer to the command line arguments.
+     */
+    void parse_commandline(int argc, char **argv);
+
+    /** Sets @p value if it has been specified on the command line.
+     *
+     * @note The type T has to match the type that has been specified for the
+     *       command line option.
+     *
+     * @param[in]  name  Name of the option to query.
+     * @param[out] value Variable to which the value will be assigned.
+     *
+     * @return True if the value is assigned, false otherwise.
+     */
+    template <typename T>
+    bool get(const std::string &name, T &value) const;
+
+protected:
+    /** Allows subclasses to add more specific options
+     *
+     * @param[in] options Boost object containing options and their descriptions
+     */
+    void add_options(const boost::program_options::options_description &options);
+
+private:
+    boost::program_options::options_description            _hidden{};
+    boost::program_options::options_description            _visible{ "Configuration options" };
+    boost::program_options::positional_options_description _positional{};
+    boost::program_options::variables_map                  _vm{};
+};
+
+template <typename T>
+bool ProgramOptions::get(const std::string &name, T &value) const
+{
+    if(_vm.count(name) != 0)
+    {
+        value = _vm[name].as<T>();
+        return true;
+    }
+
+    return false;
+}
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/RawTensor.cpp b/tests/RawTensor.cpp
new file mode 100644
index 0000000000..6bfdf57b36
--- /dev/null
+++ b/tests/RawTensor.cpp
@@ -0,0 +1,180 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "RawTensor.h"
+
+#include "Utils.h"
+
+#include "arm_compute/core/Utils.h"
+
+#include <algorithm>
+#include <array>
+#include <functional>
+#include <stdexcept>
+#include <utility>
+
+namespace arm_compute
+{
+namespace test
+{
+RawTensor::RawTensor(TensorShape shape, Format format, int fixed_point_position)
+    : _buffer(nullptr),
+      _shape(shape),
+      _format(format),
+      _fixed_point_position(fixed_point_position)
+{
+    _buffer = ::arm_compute::test::cpp14::make_unique<BufferType[]>(size());
+}
+
+RawTensor::RawTensor(TensorShape shape, DataType data_type, int num_channels, int fixed_point_position)
+    : _buffer(nullptr),
+      _shape(shape),
+      _data_type(data_type),
+      _num_channels(num_channels),
+      _fixed_point_position(fixed_point_position)
+{
+    _buffer = ::arm_compute::test::cpp14::make_unique<BufferType[]>(size());
+}
+
+RawTensor::RawTensor(const RawTensor &tensor)
+    : _buffer(nullptr),
+      _shape(tensor.shape()),
+      _format(tensor.format()),
+      _fixed_point_position(tensor.fixed_point_position())
+{
+    _buffer = ::arm_compute::test::cpp14::make_unique<BufferType[]>(tensor.size());
+    std::copy(tensor.data(), tensor.data() + size(), _buffer.get());
+}
+
+RawTensor &RawTensor::operator=(RawTensor tensor)
+{
+    swap(*this, tensor);
+
+    return *this;
+}
+
+RawTensor::BufferType &RawTensor::operator[](size_t offset)
+{
+    return _buffer[offset];
+}
+
+const RawTensor::BufferType &RawTensor::operator[](size_t offset) const
+{
+    return _buffer[offset];
+}
+
+TensorShape RawTensor::shape() const
+{
+    return _shape;
+}
+
+size_t RawTensor::element_size() const
+{
+    return num_channels() * element_size_from_data_type(data_type());
+}
+
+int RawTensor::fixed_point_position() const
+{
+    return _fixed_point_position;
+}
+
+size_t RawTensor::size() const
+{
+    const size_t size = std::accumulate(_shape.cbegin(), _shape.cend(), 1, std::multiplies<size_t>());
+    return size * element_size();
+}
+
+Format RawTensor::format() const
+{
+    return _format;
+}
+
+DataType RawTensor::data_type() const
+{
+    if(_format != Format::UNKNOWN)
+    {
+        return data_type_from_format(_format);
+    }
+    else
+    {
+        return _data_type;
+    }
+}
+
+int RawTensor::num_channels() const
+{
+    switch(_format)
+    {
+        case Format::U8:
+        case Format::S16:
+        case Format::U16:
+        case Format::S32:
+        case Format::U32:
+            return 1;
+        case Format::RGB888:
+            return 3;
+        case Format::UNKNOWN:
+            return _num_channels;
+        default:
+            ARM_COMPUTE_ERROR("NOT SUPPORTED!");
+    }
+}
+
+int RawTensor::num_elements() const
+{
+    return _shape.total_size();
+}
+
+const RawTensor::BufferType *RawTensor::data() const
+{
+    return _buffer.get();
+}
+
+RawTensor::BufferType *RawTensor::data()
+{
+    return _buffer.get();
+}
+
+const RawTensor::BufferType *RawTensor::operator()(const Coordinates &coord) const
+{
+    return _buffer.get() + coord2index(_shape, coord) * element_size();
+}
+
+RawTensor::BufferType *RawTensor::operator()(const Coordinates &coord)
+{
+    return _buffer.get() + coord2index(_shape, coord) * element_size();
+}
+
+void swap(RawTensor &tensor1, RawTensor &tensor2)
+{
+    // Use unqualified call to swap to enable ADL. But make std::swap available
+    // as backup.
+    using std::swap;
+    swap(tensor1._shape, tensor2._shape);
+    swap(tensor1._format, tensor2._format);
+    swap(tensor1._data_type, tensor2._data_type);
+    swap(tensor1._num_channels, tensor2._num_channels);
+    swap(tensor1._buffer, tensor2._buffer);
+}
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/RawTensor.h b/tests/RawTensor.h
new file mode 100644
index 0000000000..87ceb438e8
--- /dev/null
+++ b/tests/RawTensor.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_RAW_TENSOR_H__
+#define __ARM_COMPUTE_TEST_RAW_TENSOR_H__
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+
+namespace arm_compute
+{
+namespace test
+{
+/** Simple tensor object that stores elements in a consecutive chunk of memory.
+ *
+ * It can be created by either loading an image from a file which also
+ * initialises the content of the tensor or by explcitly specifying the size.
+ * The latter leaves the content uninitialised.
+ *
+ * Furthermore, the class provides methods to convert the tensor's values into
+ * different image format.
+ */
+class RawTensor final
+{
+public:
+    /** Create an uninitialised tensor of the given @p shape and @p format.
+     *
+     * @param[in] shape                Shape of the new raw tensor.
+     * @param[in] format               Format of the new raw tensor.
+     * @param[in] fixed_point_position (Optional) Number of bits for the fractional part of the fixed point numbers
+     */
+    RawTensor(TensorShape shape, Format format, int fixed_point_position = 0);
+
+    /** Create an uninitialised tensor of the given @p shape and @p data type.
+     *
+     * @param[in] shape                Shape of the new raw tensor.
+     * @param[in] data_type            Data type of the new raw tensor.
+     * @param[in] num_channels         (Optional) Number of channels (default = 1).
+     * @param[in] fixed_point_position (Optional) Number of bits for the fractional part of the fixed point numbers (default = 0).
+     */
+    RawTensor(TensorShape shape, DataType data_type, int num_channels = 1, int fixed_point_position = 0);
+
+    /** Create a deep copy of the given @p tensor.
+     *
+     * @param[in] tensor To be copied tensor.
+     */
+    RawTensor(const RawTensor &tensor);
+
+    /** Create a deep copy of the given @p tensor.
+     *
+     * @param[in] tensor To be copied tensor.
+     */
+    RawTensor &operator     =(RawTensor tensor);
+    RawTensor(RawTensor &&) = default;
+    ~RawTensor()            = default;
+
+    using BufferType = uint8_t;
+    using Buffer     = std::unique_ptr<BufferType[]>;
+
+    /** Return value at @p offset in the buffer.
+     *
+     * @param[in] offset Offset within the buffer.
+     */
+    BufferType &operator[](size_t offset);
+
+    /** Return constant value at @p offset in the buffer.
+     *
+     * @param[in] offset Offset within the buffer.
+     */
+    const BufferType &operator[](size_t offset) const;
+
+    /** Shape of the tensor. */
+    TensorShape shape() const;
+
+    /** Size of each element in the tensor in bytes. */
+    size_t element_size() const;
+
+    /** Total size of the tensor in bytes. */
+    size_t size() const;
+
+    /** Image format of the tensor. */
+    Format format() const;
+
+    /** Data type of the tensor. */
+    DataType data_type() const;
+
+    /** Number of channels of the tensor. */
+    int num_channels() const;
+
+    /** Number of elements of the tensor. */
+    int num_elements() const;
+
+    /** The number of bits for the fractional part of the fixed point numbers. */
+    int fixed_point_position() const;
+
+    /** Constant pointer to the underlying buffer. */
+    const BufferType *data() const;
+
+    /** Pointer to the underlying buffer. */
+    BufferType *data();
+
+    /** Read only access to the specified element.
+     *
+     * @param[in] coord Coordinates of the desired element.
+     *
+     * @return A pointer to the desired element.
+     */
+    const BufferType *operator()(const Coordinates &coord) const;
+
+    /** Access to the specified element.
+     *
+     * @param[in] coord Coordinates of the desired element.
+     *
+     * @return A pointer to the desired element.
+     */
+    BufferType *operator()(const Coordinates &coord);
+
+    /** Swaps the content of the provided tensors.
+     *
+     * @param[in, out] tensor1 Tensor to be swapped.
+     * @param[in, out] tensor2 Tensor to be swapped.
+     */
+    friend void swap(RawTensor &tensor1, RawTensor &tensor2);
+
+private:
+    Buffer      _buffer{ nullptr };
+    TensorShape _shape{};
+    Format      _format{ Format::UNKNOWN };
+    DataType    _data_type{ DataType::UNKNOWN };
+    int         _num_channels{ 0 };
+    int         _fixed_point_position{ 0 };
+};
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/SConscript b/tests/SConscript
new file mode 100644
index 0000000000..049113aba2
--- /dev/null
+++ b/tests/SConscript
@@ -0,0 +1,150 @@
+# Copyright (c) 2017 ARM Limited.
+#
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+import SCons
+import os.path
+
+Import('env')
+Import('vars')
+Import('arm_compute_a')
+Import('arm_compute_so')
+
+# vars is imported from arm_compute:
+variables = [
+    #FIXME Remove before release (And remove all references to INTERNAL_ONLY)
+    BoolVariable("internal_only", "Enable ARM internal only tests", True),
+    BoolVariable("pmu", "Enable PMU counters", False),
+    BoolVariable("validation_tests", "Build validation test programs", True),
+    BoolVariable("benchmark_tests", "Build validation test programs", True)
+]
+
+# We need a separate set of Variables for the Help message (Otherwise the global variables will get displayed twice)
+new_options = Variables('scons')
+
+for v in variables:
+    new_options.Add(v)
+    vars.Add(v)
+
+# Clone the environment to make sure we're not polluting the arm_compute one:
+common_env = env.Clone()
+vars.Update(common_env)
+
+Help(new_options.GenerateHelpText(common_env))
+
+if env['os'] in ['android', 'bare_metal']:
+    common_env.Append(LIBS = [arm_compute_a])
+    arm_compute_lib = arm_compute_a
+else:
+    common_env.Append(LIBS = ["arm_compute"])
+    arm_compute_lib = arm_compute_so
+
+if env['arch'] == 'arm64-v8.2-a' and ( common_env['validation_tests'] or common_env['benchmark_tests']):
+    print("validation_tests=1 and benchmark_tests=1 are not currently supported for arch=arm64-v8.2-a")
+    Exit(1)
+
+#FIXME Delete before release
+if common_env['internal_only']:
+    common_env.Append(CPPDEFINES=['INTERNAL_ONLY'])
+
+common_env.Append(CPPPATH = [".", "#3rdparty/include"])
+common_env.Append(LIBPATH = ["#3rdparty/%s/%s" % (env['os'], env['arch'])])
+common_env.Append(LIBPATH = ["#build/%s" % env['build_dir']])
+common_env.Append(LIBPATH = ["#build/%s/opencl-1.2-stubs" % env['build_dir']])
+common_env.Append(LIBS = ['boost_program_options'])
+common_env.Append(CXXFLAGS = ['-Wno-missing-field-initializers'])
+
+validation_env = common_env.Clone()
+benchmark_env = common_env.Clone()
+
+validation_env.Append(CPPDEFINES=['BOOST'])
+# overloaded virtual function "benchmark::Fixture::SetUp" is only partially overridden
+benchmark_env.Append(CPPFLAGS=['-Wno-overloaded-virtual'])
+
+files = Glob('*.cpp')
+
+common_objects = [ common_env.StaticObject( f ) for f in files ]
+
+validation_env.Append(LIBS = ['boost_unit_test_framework'])
+benchmark_env.Append(LIBS = ['benchmark'])
+
+files_validation = Glob('validation/*.cpp')
+files_benchmark = Glob('benchmark/*.cpp')
+
+if env['os'] == 'android' or not common_env['pmu']:
+    if env['os'] == 'android' and common_env['pmu']:
+        if env['Werror']:
+            print("pmu=1 is not supported for os=android")
+            Exit(1)
+        else:
+            print("pmu=1 is not supported for os=android")
+
+    files_benchmark = [f for f in files_benchmark if "PMU" not in os.path.basename(str(f))]
+
+# Add unit tests
+files_validation += Glob('validation/UNIT/*.cpp')
+files_validation += Glob('validation/UNIT/*/*.cpp')
+
+if env['opencl']:
+    Import('opencl')
+
+    benchmark_env.Append(CPPDEFINES=['OPENCL'])
+
+    files_validation += Glob('validation/CL/*.cpp')
+    files_validation += Glob('validation/CL/*/*.cpp')
+    files_validation += Glob('validation/system_tests/CL/*.cpp')
+    files_benchmark += Glob('benchmark/CL/*/*.cpp')
+    files_benchmark += Glob('benchmark/CL/*.cpp')
+    files_benchmark += Glob('benchmark/system_tests/CL/*.cpp')
+
+    validation_env.Append(LIBS = "OpenCL")
+    benchmark_env.Append(LIBS = "OpenCL")
+
+if env['neon']:
+    files_validation += Glob('validation/NEON/*.cpp')
+    files_validation += Glob('validation/NEON/*/*.cpp')
+    files_validation += Glob('validation/system_tests/NEON/*.cpp')
+    files_benchmark += Glob('benchmark/NEON/*/*.cpp')
+    files_benchmark += Glob('benchmark/NEON/*.cpp')
+    files_benchmark += Glob('benchmark/system_tests/NEON/*.cpp')
+
+if env['os'] == 'android':
+    validation_env.Append(LIBS = ["log"])
+    benchmark_env.Append(LIBS = ["log"])
+else:
+    benchmark_env.Append(LIBS = ["rt"])
+
+if common_env['validation_tests']:
+    arm_compute_validation = validation_env.Program('arm_compute_validation',
+                                                   files_validation + common_objects)
+    Depends(arm_compute_validation, arm_compute_lib)
+    if env['opencl']:
+        Depends(arm_compute_validation, opencl)
+    Default(arm_compute_validation)
+    Export('arm_compute_validation')
+if common_env['benchmark_tests']:
+    arm_compute_benchmark = benchmark_env.Program('arm_compute_benchmark',
+                                                 files_benchmark + common_objects)
+    Depends(arm_compute_benchmark, arm_compute_lib)
+    if env['opencl']:
+        Depends(arm_compute_benchmark, opencl)
+    Default(arm_compute_benchmark)
+    Export('arm_compute_benchmark')
+
diff --git a/tests/TensorCache.h b/tests/TensorCache.h
new file mode 100644
index 0000000000..b4f7b59e15
--- /dev/null
+++ b/tests/TensorCache.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_TENSOR_CACHE_H__
+#define __ARM_COMPUTE_TEST_TENSOR_CACHE_H__
+
+#include "RawTensor.h"
+
+#include <map>
+#include <mutex>
+#include <utility>
+
+namespace arm_compute
+{
+namespace test
+{
+/** Stores @ref RawTensor categorised by the image they are created from
+ * including name, format and channel.
+ */
+class TensorCache
+{
+public:
+    /* Search the cache for a tensor of created from the specified image and
+     * format.
+     *
+     * @param[in] key Key to look up the tensor. Consists of image name and format.
+     *
+     * @return The cached tensor matching the image name and format if found. A
+     *         nullptr otherwise.
+     */
+    RawTensor *find(std::tuple<const std::string &, Format> key);
+
+    /* Search the cache for a tensor of created from the specified image,
+     * format and channel.
+     *
+     * @param[in] key Key to look up the tensor. Consists of image name, format and channel.
+     *
+     * @return The cached tensor matching the image name and format if found. A
+     *         nullptr otherwise.
+     */
+    RawTensor *find(std::tuple<const std::string &, Format, Channel> key);
+
+    /** Add the given tensor to the cache. Can later be found under the given
+     * image name and format.
+     *
+     * @param[in] key Key under which to store the tensor. Consists of image name and format.
+     * @param[in] raw Raw tensor to be stored.
+     *
+     * @return A reference to the cached tensor.
+     */
+    RawTensor &add(std::tuple<const std::string &, Format> key, RawTensor raw);
+
+    /** Add the given tensor to the cache. Can later be found under the given
+     * image name, format and channel.
+     *
+     * @param[in] key Key under which to store the tensor. Consists of image name, format and channel.
+     * @param[in] raw Raw tensor to be stored.
+     *
+     * @return A reference to the cached tensor.
+     */
+    RawTensor &add(std::tuple<const std::string &, Format, Channel> key, RawTensor raw);
+
+private:
+    using FormatMap  = std::map<std::tuple<std::string, Format>, RawTensor>;
+    using ChannelMap = std::map<std::tuple<std::string, Format, Channel>, RawTensor>;
+
+    FormatMap  _raw_tensor_cache{};
+    ChannelMap _raw_tensor_channel_cache{};
+    std::mutex _raw_tensor_cache_mutex{};
+    std::mutex _raw_tensor_channel_cache_mutex{};
+};
+
+inline RawTensor *TensorCache::find(std::tuple<const std::string &, Format> key)
+{
+    const auto it = _raw_tensor_cache.find(key);
+    return it == _raw_tensor_cache.end() ? nullptr : &it->second;
+}
+
+inline RawTensor *TensorCache::find(std::tuple<const std::string &, Format, Channel> key)
+{
+    const auto it = _raw_tensor_channel_cache.find(key);
+    return it == _raw_tensor_channel_cache.end() ? nullptr : &it->second;
+}
+
+inline RawTensor &TensorCache::add(std::tuple<const std::string &, Format> key, RawTensor raw)
+{
+    std::lock_guard<std::mutex> lock(_raw_tensor_channel_cache_mutex);
+    return std::get<0>(_raw_tensor_cache.emplace(std::move(key), std::move(raw)))->second;
+}
+
+inline RawTensor &TensorCache::add(std::tuple<const std::string &, Format, Channel> key, RawTensor raw)
+{
+    std::lock_guard<std::mutex> lock(_raw_tensor_channel_cache_mutex);
+    return std::get<0>(_raw_tensor_channel_cache.emplace(std::move(key), std::move(raw)))->second;
+}
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/TensorLibrary.cpp b/tests/TensorLibrary.cpp
new file mode 100644
index 0000000000..0c85136a38
--- /dev/null
+++ b/tests/TensorLibrary.cpp
@@ -0,0 +1,475 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "TensorLibrary.h"
+
+#include "TypePrinter.h"
+#include "UserConfiguration.h"
+#include "Utils.h"
+
+#include "arm_compute/core/ITensor.h"
+
+#include <cctype>
+#include <fstream>
+#include <limits>
+#include <map>
+#include <mutex>
+#include <sstream>
+#include <stdexcept>
+#include <tuple>
+#include <unordered_map>
+#include <utility>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace
+{
+void convert_rgb_to_u8(const RawTensor &src, RawTensor &dst)
+{
+    const size_t min_size = std::min(src.size(), dst.size());
+
+    for(size_t i = 0, j = 0; i < min_size; i += 3, ++j)
+    {
+        dst.data()[j] = 0.2126f * src.data()[i + 0] + 0.7152f * src.data()[i + 1] + 0.0722f * src.data()[i + 2];
+    }
+}
+
+void convert_rgb_to_u16(const RawTensor &src, RawTensor &dst)
+{
+    const size_t min_size = std::min(src.size(), dst.size());
+
+    for(size_t i = 0, j = 0; i < min_size; i += 3, ++j)
+    {
+        reinterpret_cast<uint16_t *>(dst.data())[j] = 0.2126f * src.data()[i + 0] + 0.7152f * src.data()[i + 1] + 0.0722f * src.data()[i + 2];
+    }
+}
+
+void convert_rgb_to_s16(const RawTensor &src, RawTensor &dst)
+{
+    const size_t min_size = std::min(src.size(), dst.size());
+
+    for(size_t i = 0, j = 0; i < min_size; i += 3, ++j)
+    {
+        reinterpret_cast<int16_t *>(dst.data())[j] = 0.2126f * src.data()[i + 0] + 0.7152f * src.data()[i + 1] + 0.0722f * src.data()[i + 2];
+    }
+}
+
+void extract_r_from_rgb(const RawTensor &src, RawTensor &dst)
+{
+    const size_t min_size = std::min(src.size(), dst.size());
+
+    for(size_t i = 0, j = 0; i < min_size; i += 3, ++j)
+    {
+        dst.data()[j] = src.data()[i];
+    }
+}
+
+void extract_g_from_rgb(const RawTensor &src, RawTensor &dst)
+{
+    const size_t min_size = std::min(src.size(), dst.size());
+
+    for(size_t i = 1, j = 0; i < min_size; i += 3, ++j)
+    {
+        dst.data()[j] = src.data()[i];
+    }
+}
+
+void discard_comments(std::ifstream &fs)
+{
+    while(fs.peek() == '#')
+    {
+        fs.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
+    }
+}
+
+void discard_comments_and_spaces(std::ifstream &fs)
+{
+    while(true)
+    {
+        discard_comments(fs);
+
+        if(isspace(fs.peek()) == 0)
+        {
+            break;
+        }
+
+        fs.ignore(1);
+    }
+}
+
+std::tuple<unsigned int, unsigned int, int> parse_ppm_header(std::ifstream &fs)
+{
+    // Check the PPM magic number is valid
+    std::array<char, 2> magic_number{ { 0 } };
+    fs >> magic_number[0] >> magic_number[1];
+
+    if(magic_number[0] != 'P' || magic_number[1] != '6')
+    {
+        throw std::runtime_error("Only raw PPM format is suported");
+    }
+
+    discard_comments_and_spaces(fs);
+
+    unsigned int width = 0;
+    fs >> width;
+
+    discard_comments_and_spaces(fs);
+
+    unsigned int height = 0;
+    fs >> height;
+
+    discard_comments_and_spaces(fs);
+
+    int max_value = 0;
+    fs >> max_value;
+
+    if(!fs.good())
+    {
+        throw std::runtime_error("Cannot read image dimensions");
+    }
+
+    if(max_value != 255)
+    {
+        throw std::runtime_error("RawTensor doesn't have 8-bit values");
+    }
+
+    discard_comments(fs);
+
+    if(isspace(fs.peek()) == 0)
+    {
+        throw std::runtime_error("Invalid PPM header");
+    }
+
+    fs.ignore(1);
+
+    return std::make_tuple(width, height, max_value);
+}
+
+RawTensor load_ppm(const std::string &path)
+{
+    std::ifstream file(path, std::ios::in | std::ios::binary);
+
+    if(!file.good())
+    {
+        throw std::runtime_error("Could not load PPM image: " + path);
+    }
+
+    unsigned int width  = 0;
+    unsigned int height = 0;
+
+    std::tie(width, height, std::ignore) = parse_ppm_header(file);
+
+    RawTensor raw(TensorShape(width, height), Format::RGB888);
+
+    // Check if the file is large enough to fill the image
+    const size_t current_position = file.tellg();
+    file.seekg(0, std::ios_base::end);
+    const size_t end_position = file.tellg();
+    file.seekg(current_position, std::ios_base::beg);
+
+    if((end_position - current_position) < raw.size())
+    {
+        throw std::runtime_error("Not enough data in file");
+    }
+
+    file.read(reinterpret_cast<std::fstream::char_type *>(raw.data()), raw.size());
+
+    if(!file.good())
+    {
+        throw std::runtime_error("Failure while reading image buffer");
+    }
+
+    return raw;
+}
+} // namespace
+
+TensorLibrary::TensorLibrary(std::string path)
+    : _library_path(std::move(path)), _seed{ std::random_device()() }
+{
+}
+
+TensorLibrary::TensorLibrary(std::string path, std::random_device::result_type seed)
+    : _library_path(std::move(path)), _seed{ seed }
+{
+}
+
+std::random_device::result_type TensorLibrary::seed() const
+{
+    return _seed;
+}
+
+void TensorLibrary::fill(RawTensor &raw, const std::string &name, Format format) const
+{
+    //FIXME: Should be done by swapping cached buffers
+    const RawTensor &src = get(name, format);
+    std::copy_n(src.data(), raw.size(), raw.data());
+}
+
+void TensorLibrary::fill(RawTensor &raw, const std::string &name, Channel channel) const
+{
+    fill(raw, name, get_format_for_channel(channel), channel);
+}
+
+void TensorLibrary::fill(RawTensor &raw, const std::string &name, Format format, Channel channel) const
+{
+    const RawTensor &src = get(name, format, channel);
+    std::copy_n(src.data(), raw.size(), raw.data());
+}
+
+const TensorLibrary::Loader &TensorLibrary::get_loader(const std::string &extension) const
+{
+    static std::unordered_map<std::string, Loader> loaders =
+    {
+        { "ppm", load_ppm }
+    };
+
+    const auto it = loaders.find(extension);
+
+    if(it != loaders.end())
+    {
+        return it->second;
+    }
+    else
+    {
+        throw std::invalid_argument("Cannot load image with extension '" + extension + "'");
+    }
+}
+
+const TensorLibrary::Converter &TensorLibrary::get_converter(Format src, Format dst) const
+{
+    static std::map<std::pair<Format, Format>, Converter> converters =
+    {
+        { std::make_pair(Format::RGB888, Format::U8), convert_rgb_to_u8 },
+        { std::make_pair(Format::RGB888, Format::U16), convert_rgb_to_u16 },
+        { std::make_pair(Format::RGB888, Format::S16), convert_rgb_to_s16 }
+    };
+
+    const auto it = converters.find(std::make_pair(src, dst));
+
+    if(it != converters.end())
+    {
+        return it->second;
+    }
+    else
+    {
+        std::stringstream msg;
+        msg << "Cannot convert from format '" << src << "' to format '" << dst << "'\n";
+        throw std::invalid_argument(msg.str());
+    }
+}
+
+const TensorLibrary::Converter &TensorLibrary::get_converter(DataType src, Format dst) const
+{
+    static std::map<std::pair<DataType, Format>, Converter> converters = {};
+
+    const auto it = converters.find(std::make_pair(src, dst));
+
+    if(it != converters.end())
+    {
+        return it->second;
+    }
+    else
+    {
+        std::stringstream msg;
+        msg << "Cannot convert from data type '" << src << "' to format '" << dst << "'\n";
+        throw std::invalid_argument(msg.str());
+    }
+}
+
+const TensorLibrary::Converter &TensorLibrary::get_converter(DataType src, DataType dst) const
+{
+    static std::map<std::pair<DataType, DataType>, Converter> converters = {};
+
+    const auto it = converters.find(std::make_pair(src, dst));
+
+    if(it != converters.end())
+    {
+        return it->second;
+    }
+    else
+    {
+        std::stringstream msg;
+        msg << "Cannot convert from data type '" << src << "' to data type '" << dst << "'\n";
+        throw std::invalid_argument(msg.str());
+    }
+}
+
+const TensorLibrary::Converter &TensorLibrary::get_converter(Format src, DataType dst) const
+{
+    static std::map<std::pair<Format, DataType>, Converter> converters = {};
+
+    const auto it = converters.find(std::make_pair(src, dst));
+
+    if(it != converters.end())
+    {
+        return it->second;
+    }
+    else
+    {
+        std::stringstream msg;
+        msg << "Cannot convert from format '" << src << "' to data type '" << dst << "'\n";
+        throw std::invalid_argument(msg.str());
+    }
+}
+
+const TensorLibrary::Extractor &TensorLibrary::get_extractor(Format format, Channel channel) const
+{
+    static std::map<std::pair<Format, Channel>, Extractor> extractors =
+    {
+        { std::make_pair(Format::RGB888, Channel::R), extract_r_from_rgb },
+        { std::make_pair(Format::RGB888, Channel::G), extract_g_from_rgb }
+    };
+
+    const auto it = extractors.find(std::make_pair(format, channel));
+
+    if(it != extractors.end())
+    {
+        return it->second;
+    }
+    else
+    {
+        std::stringstream msg;
+        msg << "Cannot extract channel '" << channel << "' from format '" << format << "'\n";
+        throw std::invalid_argument(msg.str());
+    }
+}
+
+RawTensor TensorLibrary::load_image(const std::string &name) const
+{
+#ifdef _WIN32
+    const std::string image_path = ("\\images\\");
+#else
+    const std::string image_path = ("/images/");
+#endif
+
+    const std::string path      = _library_path + image_path + name;
+    const std::string extension = path.substr(path.find_last_of('.') + 1);
+    return (*get_loader(extension))(path);
+}
+
+const RawTensor &TensorLibrary::find_or_create_raw_tensor(const std::string &name, Format format) const
+{
+    std::lock_guard<std::mutex> guard(_format_lock);
+
+    const RawTensor *ptr = _cache.find(std::make_tuple(name, format));
+
+    if(ptr != nullptr)
+    {
+        return *ptr;
+    }
+
+    RawTensor raw = load_image(name);
+
+    if(raw.format() != format)
+    {
+        //FIXME: Remove unnecessary copy
+        RawTensor dst(raw.shape(), format);
+        (*get_converter(raw.format(), format))(raw, dst);
+        raw = std::move(dst);
+    }
+
+    return _cache.add(std::make_tuple(name, format), std::move(raw));
+}
+
+const RawTensor &TensorLibrary::find_or_create_raw_tensor(const std::string &name, Format format, Channel channel) const
+{
+    std::lock_guard<std::mutex> guard(_channel_lock);
+
+    const RawTensor *ptr = _cache.find(std::make_tuple(name, format, channel));
+
+    if(ptr != nullptr)
+    {
+        return *ptr;
+    }
+
+    const RawTensor &src = get(name, format);
+    //FIXME: Need to change shape to match channel
+    RawTensor dst(src.shape(), get_channel_format(channel));
+
+    (*get_extractor(format, channel))(src, dst);
+
+    return _cache.add(std::make_tuple(name, format, channel), std::move(dst));
+}
+
+RawTensor TensorLibrary::get(const TensorShape &shape, DataType data_type, int num_channels, int fixed_point_position)
+{
+    return RawTensor(shape, data_type, num_channels, fixed_point_position);
+}
+
+RawTensor TensorLibrary::get(const TensorShape &shape, Format format)
+{
+    return RawTensor(shape, format);
+}
+
+const RawTensor &TensorLibrary::get(const std::string &name) const
+{
+    //FIXME: Format should be derived from the image name. Not be fixed to RGB.
+    return find_or_create_raw_tensor(name, Format::RGB888);
+}
+
+RawTensor TensorLibrary::get(const std::string &name)
+{
+    //FIXME: Format should be derived from the image name. Not be fixed to RGB.
+    return RawTensor(find_or_create_raw_tensor(name, Format::RGB888));
+}
+
+RawTensor TensorLibrary::get(const std::string &name, DataType data_type, int num_channels) const
+{
+    const RawTensor &raw = get(name);
+
+    return RawTensor(raw.shape(), data_type, num_channels);
+}
+
+const RawTensor &TensorLibrary::get(const std::string &name, Format format) const
+{
+    return find_or_create_raw_tensor(name, format);
+}
+
+RawTensor TensorLibrary::get(const std::string &name, Format format)
+{
+    return RawTensor(find_or_create_raw_tensor(name, format));
+}
+
+const RawTensor &TensorLibrary::get(const std::string &name, Channel channel) const
+{
+    return get(name, get_format_for_channel(channel), channel);
+}
+
+RawTensor TensorLibrary::get(const std::string &name, Channel channel)
+{
+    return RawTensor(get(name, get_format_for_channel(channel), channel));
+}
+
+const RawTensor &TensorLibrary::get(const std::string &name, Format format, Channel channel) const
+{
+    return find_or_create_raw_tensor(name, format, channel);
+}
+
+RawTensor TensorLibrary::get(const std::string &name, Format format, Channel channel)
+{
+    return RawTensor(find_or_create_raw_tensor(name, format, channel));
+}
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/TensorLibrary.h b/tests/TensorLibrary.h
new file mode 100644
index 0000000000..b3974836ae
--- /dev/null
+++ b/tests/TensorLibrary.h
@@ -0,0 +1,656 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_TENSOR_LIBRARY_H__
+#define __ARM_COMPUTE_TEST_TENSOR_LIBRARY_H__
+
+#include "RawTensor.h"
+#include "TensorCache.h"
+#include "Utils.h"
+
+#include "arm_compute/core/Coordinates.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Window.h"
+
+#include <algorithm>
+#include <cstddef>
+#include <fstream>
+#include <random>
+#include <string>
+#include <type_traits>
+
+namespace arm_compute
+{
+namespace test
+{
+/** Factory class to create and fill tensors.
+ *
+ * Allows to initialise tensors from loaded images or by specifying the shape
+ * explicitly. Furthermore, provides methods to fill tensors with the content of
+ * loaded images or with random values.
+ */
+class TensorLibrary final
+{
+public:
+    /** Initialises the library with a @p path to the image directory.
+     *
+     * @param[in] path Path to load images from.
+     */
+    TensorLibrary(std::string path);
+
+    /** Initialises the library with a @p path to the image directory.
+     * Furthermore, sets the seed for the random generator to @p seed.
+     *
+     * @param[in] path Path to load images from.
+     * @param[in] seed Seed used to initialise the random number generator.
+     */
+    TensorLibrary(std::string path, std::random_device::result_type seed);
+
+    /** Seed that is used to fill tensors with random values. */
+    std::random_device::result_type seed() const;
+
+    /** Creates an uninitialised raw tensor with the given @p shape, @p
+     * data_type and @p num_channels.
+     *
+     * @param[in] shape                Shape used to initialise the tensor.
+     * @param[in] data_type            Data type used to initialise the tensor.
+     * @param[in] num_channels         (Optional) Number of channels used to initialise the tensor.
+     * @param[in] fixed_point_position (Optional) Number of bits for the fractional part of the fixed point numbers
+     */
+    static RawTensor get(const TensorShape &shape, DataType data_type, int num_channels = 1, int fixed_point_position = 0);
+
+    /** Creates an uninitialised raw tensor with the given @p shape and @p format.
+     *
+     * @param[in] shape  Shape used to initialise the tensor.
+     * @param[in] format Format used to initialise the tensor.
+     */
+    static RawTensor get(const TensorShape &shape, Format format);
+
+    /** Provides a contant raw tensor for the specified image.
+     *
+     * @param[in] name Image file used to look up the raw tensor.
+     */
+    const RawTensor &get(const std::string &name) const;
+
+    /** Provides a raw tensor for the specified image.
+     *
+     * @param[in] name Image file used to look up the raw tensor.
+     */
+    RawTensor get(const std::string &name);
+
+    /** Creates an uninitialised raw tensor with the given @p data_type and @p
+     * num_channels. The shape is derived from the specified image.
+     *
+     * @param[in] name         Image file used to initialise the tensor.
+     * @param[in] data_type    Data type used to initialise the tensor.
+     * @param[in] num_channels Number of channels used to initialise the tensor.
+     */
+    RawTensor get(const std::string &name, DataType data_type, int num_channels = 1) const;
+
+    /** Provides a contant raw tensor for the specified image after it has been
+     * converted to @p format.
+     *
+     * @param[in] name   Image file used to look up the raw tensor.
+     * @param[in] format Format used to look up the raw tensor.
+     */
+    const RawTensor &get(const std::string &name, Format format) const;
+
+    /** Provides a raw tensor for the specified image after it has been
+     * converted to @p format.
+     *
+     * @param[in] name   Image file used to look up the raw tensor.
+     * @param[in] format Format used to look up the raw tensor.
+     */
+    RawTensor get(const std::string &name, Format format);
+
+    /** Provides a contant raw tensor for the specified channel after it has
+     * been extracted form the given image.
+     *
+     * @param[in] name    Image file used to look up the raw tensor.
+     * @param[in] channel Channel used to look up the raw tensor.
+     *
+     * @note The channel has to be unambiguous so that the format can be
+     *       inferred automatically.
+     */
+    const RawTensor &get(const std::string &name, Channel channel) const;
+
+    /** Provides a raw tensor for the specified channel after it has been
+     * extracted form the given image.
+     *
+     * @param[in] name    Image file used to look up the raw tensor.
+     * @param[in] channel Channel used to look up the raw tensor.
+     *
+     * @note The channel has to be unambiguous so that the format can be
+     *       inferred automatically.
+     */
+    RawTensor get(const std::string &name, Channel channel);
+
+    /** Provides a constant raw tensor for the specified channel after it has
+     * been extracted form the given image formatted to @p format.
+     *
+     * @param[in] name    Image file used to look up the raw tensor.
+     * @param[in] format  Format used to look up the raw tensor.
+     * @param[in] channel Channel used to look up the raw tensor.
+     */
+    const RawTensor &get(const std::string &name, Format format, Channel channel) const;
+
+    /** Provides a raw tensor for the specified channel after it has been
+     * extracted form the given image formatted to @p format.
+     *
+     * @param[in] name    Image file used to look up the raw tensor.
+     * @param[in] format  Format used to look up the raw tensor.
+     * @param[in] channel Channel used to look up the raw tensor.
+     */
+    RawTensor get(const std::string &name, Format format, Channel channel);
+
+    /** Fills the specified @p tensor with random values drawn from @p
+     * distribution.
+     *
+     * @param[in, out] tensor       To be filled tensor.
+     * @param[in]      distribution Distribution used to fill the tensor.
+     * @param[in]      seed_offset  The offset will be added to the global seed before initialising the random generator.
+     *
+     * @note The @p distribution has to provide operator(Generator &) which
+     *       will be used to draw samples.
+     */
+    template <typename T, typename D>
+    void fill(T &&tensor, D &&distribution, std::random_device::result_type seed_offset) const;
+
+    /** Fills the specified @p raw tensor with random values drawn from @p
+     * distribution.
+     *
+     * @param[in, out] raw          To be filled raw.
+     * @param[in]      distribution Distribution used to fill the tensor.
+     * @param[in]      seed_offset  The offset will be added to the global seed before initialising the random generator.
+     *
+     * @note The @p distribution has to provide operator(Generator &) which
+     *       will be used to draw samples.
+     */
+    template <typename D>
+    void fill(RawTensor &raw, D &&distribution, std::random_device::result_type seed_offset) const;
+
+    /** Fills the specified @p tensor with the content of the specified image
+     * converted to the given format.
+     *
+     * @param[in, out] tensor To be filled tensor.
+     * @param[in]      name   Image file used to fill the tensor.
+     * @param[in]      format Format of the image used to fill the tensor.
+     *
+     * @warning No check is performed that the specified format actually
+     *          matches the format of the tensor.
+     */
+    template <typename T>
+    void fill(T &&tensor, const std::string &name, Format format) const;
+
+    /** Fills the raw tensor with the content of the specified image
+     * converted to the given format.
+     *
+     * @param[in, out] raw    To be filled raw tensor.
+     * @param[in]      name   Image file used to fill the tensor.
+     * @param[in]      format Format of the image used to fill the tensor.
+     *
+     * @warning No check is performed that the specified format actually
+     *          matches the format of the tensor.
+     */
+    void fill(RawTensor &raw, const std::string &name, Format format) const;
+
+    /** Fills the specified @p tensor with the content of the specified channel
+     * extracted from the given image.
+     *
+     * @param[in, out] tensor  To be filled tensor.
+     * @param[in]      name    Image file used to fill the tensor.
+     * @param[in]      channel Channel of the image used to fill the tensor.
+     *
+     * @note The channel has to be unambiguous so that the format can be
+     *       inferred automatically.
+     *
+     * @warning No check is performed that the specified format actually
+     *          matches the format of the tensor.
+     */
+    template <typename T>
+    void fill(T &&tensor, const std::string &name, Channel channel) const;
+
+    /** Fills the raw tensor with the content of the specified channel
+     * extracted from the given image.
+     *
+     * @param[in, out] raw     To be filled raw tensor.
+     * @param[in]      name    Image file used to fill the tensor.
+     * @param[in]      channel Channel of the image used to fill the tensor.
+     *
+     * @note The channel has to be unambiguous so that the format can be
+     *       inferred automatically.
+     *
+     * @warning No check is performed that the specified format actually
+     *          matches the format of the tensor.
+     */
+    void fill(RawTensor &raw, const std::string &name, Channel channel) const;
+
+    /** Fills the specified @p tensor with the content of the specified channel
+     * extracted from the given image after it has been converted to the given
+     * format.
+     *
+     * @param[in, out] tensor  To be filled tensor.
+     * @param[in]      name    Image file used to fill the tensor.
+     * @param[in]      format  Format of the image used to fill the tensor.
+     * @param[in]      channel Channel of the image used to fill the tensor.
+     *
+     * @warning No check is performed that the specified format actually
+     *          matches the format of the tensor.
+     */
+    template <typename T>
+    void fill(T &&tensor, const std::string &name, Format format, Channel channel) const;
+
+    /** Fills the raw tensor with the content of the specified channel
+     * extracted from the given image after it has been converted to the given
+     * format.
+     *
+     * @param[in, out] raw     To be filled raw tensor.
+     * @param[in]      name    Image file used to fill the tensor.
+     * @param[in]      format  Format of the image used to fill the tensor.
+     * @param[in]      channel Channel of the image used to fill the tensor.
+     *
+     * @warning No check is performed that the specified format actually
+     *          matches the format of the tensor.
+     */
+    void fill(RawTensor &raw, const std::string &name, Format format, Channel channel) const;
+
+    /** Fill a tensor with uniform distribution across the range of its type
+     *
+     * @param[in, out] tensor      To be filled tensor.
+     * @param[in]      seed_offset The offset will be added to the global seed before initialising the random generator.
+     */
+    template <typename T>
+    void fill_tensor_uniform(T &&tensor, std::random_device::result_type seed_offset) const;
+
+    /** Fill a tensor with uniform distribution across the a specified range
+     *
+     * @param[in, out] tensor      To be filled tensor.
+     * @param[in]      seed_offset The offset will be added to the global seed before initialising the random generator.
+     * @param[in]      low         lowest value in the range (inclusive)
+     * @param[in]      high        highest value in the range (inclusive)
+     *
+     * @note    @p low and @p high must be of the same type as the data type of @p tensor
+     */
+    template <typename T, typename D>
+    void fill_tensor_uniform(T &&tensor, std::random_device::result_type seed_offset, D low, D high) const;
+
+    /** Fills the specified @p tensor with data loaded from binary in specified path.
+     *
+     * @param[in, out] tensor To be filled tensor.
+     * @param[in]      name   Data file.
+     */
+    template <typename T>
+    void fill_layer_data(T &&tensor, std::string name) const;
+
+private:
+    // Function prototype to convert between image formats.
+    using Converter = void (*)(const RawTensor &src, RawTensor &dst);
+    // Function prototype to extract a channel from an image.
+    using Extractor = void (*)(const RawTensor &src, RawTensor &dst);
+    // Function prototype to load an image file.
+    using Loader = RawTensor (*)(const std::string &path);
+
+    const Converter &get_converter(Format src, Format dst) const;
+    const Converter &get_converter(DataType src, Format dst) const;
+    const Converter &get_converter(Format src, DataType dst) const;
+    const Converter &get_converter(DataType src, DataType dst) const;
+    const Extractor &get_extractor(Format format, Channel) const;
+    const Loader &get_loader(const std::string &extension) const;
+
+    /** Creates a raw tensor from the specified image.
+     *
+     * @param[in] name To be loaded image file.
+     *
+     * @note If use_single_image is true @p name is ignored and the user image
+     *       is loaded instead.
+     */
+    RawTensor load_image(const std::string &name) const;
+
+    /** Provides a raw tensor for the specified image and format.
+     *
+     * @param[in] name   Image file used to look up the raw tensor.
+     * @param[in] format Format used to look up the raw tensor.
+     *
+     * If the tensor has already been requested before the cached version will
+     * be returned. Otherwise the tensor will be added to the cache.
+     *
+     * @note If use_single_image is true @p name is ignored and the user image
+     *       is loaded instead.
+     */
+    const RawTensor &find_or_create_raw_tensor(const std::string &name, Format format) const;
+
+    /** Provides a raw tensor for the specified image, format and channel.
+     *
+     * @param[in] name    Image file used to look up the raw tensor.
+     * @param[in] format  Format used to look up the raw tensor.
+     * @param[in] channel Channel used to look up the raw tensor.
+     *
+     * If the tensor has already been requested before the cached version will
+     * be returned. Otherwise the tensor will be added to the cache.
+     *
+     * @note If use_single_image is true @p name is ignored and the user image
+     *       is loaded instead.
+     */
+    const RawTensor &find_or_create_raw_tensor(const std::string &name, Format format, Channel channel) const;
+
+    mutable TensorCache             _cache{};
+    mutable std::mutex              _format_lock{};
+    mutable std::mutex              _channel_lock{};
+    std::string                     _library_path;
+    std::random_device::result_type _seed;
+};
+
+template <typename T, typename D>
+void TensorLibrary::fill(T &&tensor, D &&distribution, std::random_device::result_type seed_offset) const
+{
+    Window window;
+    for(unsigned int d = 0; d < tensor.shape().num_dimensions(); ++d)
+    {
+        window.set(d, Window::Dimension(0, tensor.shape()[d], 1));
+    }
+
+    std::mt19937 gen(_seed + seed_offset);
+
+    //FIXME: Replace with normal loop
+    execute_window_loop(window, [&](const Coordinates & id)
+    {
+        using ResultType         = typename std::remove_reference<D>::type::result_type;
+        const ResultType value   = distribution(gen);
+        void *const      out_ptr = tensor(id);
+        store_value_with_data_type(out_ptr, value, tensor.data_type());
+    });
+}
+
+template <typename D>
+void TensorLibrary::fill(RawTensor &raw, D &&distribution, std::random_device::result_type seed_offset) const
+{
+    std::mt19937 gen(_seed + seed_offset);
+
+    for(size_t offset = 0; offset < raw.size(); offset += raw.element_size())
+    {
+        using ResultType       = typename std::remove_reference<D>::type::result_type;
+        const ResultType value = distribution(gen);
+        store_value_with_data_type(raw.data() + offset, value, raw.data_type());
+    }
+}
+
+template <typename T>
+void TensorLibrary::fill(T &&tensor, const std::string &name, Format format) const
+{
+    const RawTensor &raw = get(name, format);
+
+    for(size_t offset = 0; offset < raw.size(); offset += raw.element_size())
+    {
+        const Coordinates id = index2coord(raw.shape(), offset / raw.element_size());
+
+        const RawTensor::BufferType *const raw_ptr = raw.data() + offset;
+        const auto                         out_ptr = static_cast<RawTensor::BufferType *>(tensor(id));
+        std::copy_n(raw_ptr, raw.element_size(), out_ptr);
+    }
+}
+
+template <typename T>
+void TensorLibrary::fill(T &&tensor, const std::string &name, Channel channel) const
+{
+    fill(std::forward<T>(tensor), name, get_format_for_channel(channel), channel);
+}
+
+template <typename T>
+void TensorLibrary::fill(T &&tensor, const std::string &name, Format format, Channel channel) const
+{
+    const RawTensor &raw = get(name, format, channel);
+
+    for(size_t offset = 0; offset < raw.size(); offset += raw.element_size())
+    {
+        const Coordinates id = index2coord(raw.shape(), offset / raw.element_size());
+
+        const RawTensor::BufferType *const raw_ptr = raw.data() + offset;
+        const auto                         out_ptr = static_cast<RawTensor::BufferType *>(tensor(id));
+        std::copy_n(raw_ptr, raw.element_size(), out_ptr);
+    }
+}
+
+template <typename T>
+void TensorLibrary::fill_tensor_uniform(T &&tensor, std::random_device::result_type seed_offset) const
+{
+    switch(tensor.data_type())
+    {
+        case DataType::U8:
+        {
+            std::uniform_int_distribution<uint8_t> distribution_u8(std::numeric_limits<uint8_t>::lowest(), std::numeric_limits<uint8_t>::max());
+            fill(tensor, distribution_u8, seed_offset);
+            break;
+        }
+        case DataType::S8:
+        case DataType::QS8:
+        {
+            std::uniform_int_distribution<int8_t> distribution_s8(std::numeric_limits<int8_t>::lowest(), std::numeric_limits<int8_t>::max());
+            fill(tensor, distribution_s8, seed_offset);
+            break;
+        }
+        case DataType::U16:
+        {
+            std::uniform_int_distribution<uint16_t> distribution_u16(std::numeric_limits<uint16_t>::lowest(), std::numeric_limits<uint16_t>::max());
+            fill(tensor, distribution_u16, seed_offset);
+            break;
+        }
+        case DataType::S16:
+        {
+            std::uniform_int_distribution<int16_t> distribution_s16(std::numeric_limits<int16_t>::lowest(), std::numeric_limits<int16_t>::max());
+            fill(tensor, distribution_s16, seed_offset);
+            break;
+        }
+        case DataType::U32:
+        {
+            std::uniform_int_distribution<uint32_t> distribution_u32(std::numeric_limits<uint32_t>::lowest(), std::numeric_limits<uint32_t>::max());
+            fill(tensor, distribution_u32, seed_offset);
+            break;
+        }
+        case DataType::S32:
+        {
+            std::uniform_int_distribution<int32_t> distribution_s32(std::numeric_limits<int32_t>::lowest(), std::numeric_limits<int32_t>::max());
+            fill(tensor, distribution_s32, seed_offset);
+            break;
+        }
+        case DataType::U64:
+        {
+            std::uniform_int_distribution<uint64_t> distribution_u64(std::numeric_limits<uint64_t>::lowest(), std::numeric_limits<uint64_t>::max());
+            fill(tensor, distribution_u64, seed_offset);
+            break;
+        }
+        case DataType::S64:
+        {
+            std::uniform_int_distribution<int64_t> distribution_s64(std::numeric_limits<int64_t>::lowest(), std::numeric_limits<int64_t>::max());
+            fill(tensor, distribution_s64, seed_offset);
+            break;
+        }
+#ifdef ENABLE_FP16
+        case DataType::F16:
+        {
+            std::uniform_real_distribution<float16_t> distribution_f16(std::numeric_limits<float16_t>::lowest(), std::numeric_limits<float16_t>::max());
+            fill(tensor, distribution_f16, seed_offset);
+            break;
+        }
+#endif
+        case DataType::F32:
+        {
+            // It doesn't make sense to check [-inf, inf], so hard code it to a big number
+            std::uniform_real_distribution<float> distribution_f32(-1000.f, 1000.f);
+            fill(tensor, distribution_f32, seed_offset);
+            break;
+        }
+        case DataType::F64:
+        {
+            // It doesn't make sense to check [-inf, inf], so hard code it to a big number
+            std::uniform_real_distribution<double> distribution_f64(-1000.f, 1000.f);
+            fill(tensor, distribution_f64, seed_offset);
+            break;
+        }
+        case DataType::SIZET:
+        {
+            std::uniform_int_distribution<size_t> distribution_sizet(std::numeric_limits<size_t>::lowest(), std::numeric_limits<size_t>::max());
+            fill(tensor, distribution_sizet, seed_offset);
+            break;
+        }
+        default:
+            ARM_COMPUTE_ERROR("NOT SUPPORTED!");
+    }
+}
+
+template <typename T, typename D>
+void TensorLibrary::fill_tensor_uniform(T &&tensor, std::random_device::result_type seed_offset, D low, D high) const
+{
+    switch(tensor.data_type())
+    {
+        case DataType::U8:
+        {
+            ARM_COMPUTE_ERROR_ON(!(std::is_same<uint8_t, D>::value));
+            std::uniform_int_distribution<uint8_t> distribution_u8(low, high);
+            fill(tensor, distribution_u8, seed_offset);
+            break;
+        }
+        case DataType::S8:
+        case DataType::QS8:
+        {
+            ARM_COMPUTE_ERROR_ON(!(std::is_same<int8_t, D>::value));
+            std::uniform_int_distribution<int8_t> distribution_s8(low, high);
+            fill(tensor, distribution_s8, seed_offset);
+            break;
+        }
+        case DataType::U16:
+        {
+            ARM_COMPUTE_ERROR_ON(!(std::is_same<uint16_t, D>::value));
+            std::uniform_int_distribution<uint16_t> distribution_u16(low, high);
+            fill(tensor, distribution_u16, seed_offset);
+            break;
+        }
+        case DataType::S16:
+        {
+            ARM_COMPUTE_ERROR_ON(!(std::is_same<int16_t, D>::value));
+            std::uniform_int_distribution<int16_t> distribution_s16(low, high);
+            fill(tensor, distribution_s16, seed_offset);
+            break;
+        }
+        case DataType::U32:
+        {
+            ARM_COMPUTE_ERROR_ON(!(std::is_same<uint32_t, D>::value));
+            std::uniform_int_distribution<uint32_t> distribution_u32(low, high);
+            fill(tensor, distribution_u32, seed_offset);
+            break;
+        }
+        case DataType::S32:
+        {
+            ARM_COMPUTE_ERROR_ON(!(std::is_same<int32_t, D>::value));
+            std::uniform_int_distribution<int32_t> distribution_s32(low, high);
+            fill(tensor, distribution_s32, seed_offset);
+            break;
+        }
+        case DataType::U64:
+        {
+            ARM_COMPUTE_ERROR_ON(!(std::is_same<uint64_t, D>::value));
+            std::uniform_int_distribution<uint64_t> distribution_u64(low, high);
+            fill(tensor, distribution_u64, seed_offset);
+            break;
+        }
+        case DataType::S64:
+        {
+            ARM_COMPUTE_ERROR_ON(!(std::is_same<int64_t, D>::value));
+            std::uniform_int_distribution<int64_t> distribution_s64(low, high);
+            fill(tensor, distribution_s64, seed_offset);
+            break;
+        }
+#if ENABLE_FP16
+        case DataType::F16:
+        {
+            ARM_COMPUTE_ERROR_ON(!(std::is_same<float16_t, D>::value));
+            std::uniform_real_distribution<float16_t> distribution_f16(low, high);
+            fill(tensor, distribution_f16, seed_offset);
+            break;
+        }
+#endif
+        case DataType::F32:
+        {
+            ARM_COMPUTE_ERROR_ON(!(std::is_same<float, D>::value));
+            std::uniform_real_distribution<float> distribution_f32(low, high);
+            fill(tensor, distribution_f32, seed_offset);
+            break;
+        }
+        case DataType::F64:
+        {
+            ARM_COMPUTE_ERROR_ON(!(std::is_same<double, D>::value));
+            std::uniform_real_distribution<double> distribution_f64(low, high);
+            fill(tensor, distribution_f64, seed_offset);
+            break;
+        }
+        case DataType::SIZET:
+        {
+            ARM_COMPUTE_ERROR_ON(!(std::is_same<size_t, D>::value));
+            std::uniform_int_distribution<size_t> distribution_sizet(low, high);
+            fill(tensor, distribution_sizet, seed_offset);
+            break;
+        }
+        default:
+            ARM_COMPUTE_ERROR("NOT SUPPORTED!");
+    }
+}
+
+template <typename T>
+void TensorLibrary::fill_layer_data(T &&tensor, std::string name) const
+{
+#ifdef _WIN32
+    const std::string path_separator("\\");
+#else
+    const std::string path_separator("/");
+#endif
+
+    const std::string path = _library_path + path_separator + name;
+
+    // Open file
+    std::ifstream file(path, std::ios::in | std::ios::binary);
+    if(!file.good())
+    {
+        throw std::runtime_error("Could not load binary data: " + path);
+    }
+
+    Window window;
+    for(unsigned int d = 0; d < tensor.shape().num_dimensions(); ++d)
+    {
+        window.set(d, Window::Dimension(0, tensor.shape()[d], 1));
+    }
+
+    //FIXME : Replace with normal loop
+    execute_window_loop(window, [&](const Coordinates & id)
+    {
+        float val;
+        file.read(reinterpret_cast<char *>(&val), sizeof(float));
+        void *const out_ptr = tensor(id);
+        store_value_with_data_type(out_ptr, val, tensor.data_type());
+    });
+}
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/TypePrinter.h b/tests/TypePrinter.h
new file mode 100644
index 0000000000..3d5a19981f
--- /dev/null
+++ b/tests/TypePrinter.h
@@ -0,0 +1,403 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_TYPE_PRINTER_H__
+#define __ARM_COMPUTE_TEST_TYPE_PRINTER_H__
+
+#include "arm_compute/core/Dimensions.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Types.h"
+
+#include <ostream>
+
+namespace arm_compute
+{
+/** Formatted output of the Dimensions type. */
+template <typename T>
+inline ::std::ostream &operator<<(::std::ostream &os, const Dimensions<T> &dimensions)
+{
+    os << "(";
+
+    if(dimensions.num_dimensions() > 0)
+    {
+        os << dimensions[0];
+
+        for(unsigned int d = 1; d < dimensions.num_dimensions(); ++d)
+        {
+            os << ", " << dimensions[d];
+        }
+    }
+
+    os << ")";
+
+    return os;
+}
+
+/** Formatted output of the PadStridInfo type. */
+inline ::std::ostream &operator<<(::std::ostream &os, const PadStrideInfo &pad_stride_info)
+{
+    os << "(";
+    os << pad_stride_info.stride().first << ", " << pad_stride_info.stride().second;
+    os << ", ";
+    os << pad_stride_info.pad().first << ", " << pad_stride_info.pad().second;
+    os << ")";
+
+    return os;
+}
+
+/** Formatted output of the BorderMode type. */
+inline ::std::ostream &operator<<(::std::ostream &os, const BorderMode &mode)
+{
+    switch(mode)
+    {
+        case BorderMode::UNDEFINED:
+            os << "UNDEFINED";
+            break;
+        case BorderMode::CONSTANT:
+            os << "CONSTANT";
+            break;
+        case BorderMode::REPLICATE:
+            os << "REPLICATE";
+            break;
+        default:
+            ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+    }
+
+    return os;
+}
+
+/** Formatted output of the InterpolationPolicy type. */
+inline ::std::ostream &operator<<(::std::ostream &os, const InterpolationPolicy &policy)
+{
+    switch(policy)
+    {
+        case InterpolationPolicy::NEAREST_NEIGHBOR:
+            os << "NEAREST_NEIGHBOR";
+            break;
+        case InterpolationPolicy::BILINEAR:
+            os << "BILINEAR";
+            break;
+        case InterpolationPolicy::AREA:
+            os << "AREA";
+            break;
+        default:
+            ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+    }
+
+    return os;
+}
+
+/** Formatted output of the ConversionPolicy type. */
+inline ::std::ostream &operator<<(::std::ostream &os, const ConvertPolicy &policy)
+{
+    switch(policy)
+    {
+        case ConvertPolicy::WRAP:
+            os << "WRAP";
+            break;
+        case ConvertPolicy::SATURATE:
+            os << "SATURATE";
+            break;
+        default:
+            ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+    }
+
+    return os;
+}
+
+/** Formatted output of the activation function type. */
+inline ::std::ostream &operator<<(::std::ostream &os, const ActivationLayerInfo::ActivationFunction &act_function)
+{
+    switch(act_function)
+    {
+        case ActivationLayerInfo::ActivationFunction::ABS:
+            os << "ABS";
+            break;
+        case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
+            os << "BOUNDED_RELU";
+            break;
+        case ActivationLayerInfo::ActivationFunction::LINEAR:
+            os << "LINEAR";
+            break;
+        case ActivationLayerInfo::ActivationFunction::LOGISTIC:
+            os << "LOGISTIC";
+            break;
+        case ActivationLayerInfo::ActivationFunction::RELU:
+            os << "RELU";
+            break;
+        case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
+            os << "SOFT_RELU";
+            break;
+        case ActivationLayerInfo::ActivationFunction::SQRT:
+            os << "SQRT";
+            break;
+        case ActivationLayerInfo::ActivationFunction::SQUARE:
+            os << "SQUARE";
+            break;
+        case ActivationLayerInfo::ActivationFunction::TANH:
+            os << "TANH";
+            break;
+        default:
+            ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+    }
+
+    return os;
+}
+
+/** Formatted output of the NormType type. */
+inline ::std::ostream &operator<<(::std::ostream &os, const NormType &norm_type)
+{
+    switch(norm_type)
+    {
+        case NormType::CROSS_MAP:
+            os << "CROSS_MAP";
+            break;
+        case NormType::IN_MAP_1D:
+            os << "IN_MAP_1D";
+            break;
+        case NormType::IN_MAP_2D:
+            os << "IN_MAP_2D";
+            break;
+        default:
+            ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+    }
+
+    return os;
+}
+
+/** Formatted output of the PoolingType type. */
+inline ::std::ostream &operator<<(::std::ostream &os, const PoolingType &pool_type)
+{
+    switch(pool_type)
+    {
+        case PoolingType::AVG:
+            os << "AVG";
+            break;
+        case PoolingType::MAX:
+            os << "MAX";
+            break;
+        default:
+            ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+    }
+
+    return os;
+}
+
+/** Formatted output of the RoundingPolicy type. */
+inline ::std::ostream &operator<<(::std::ostream &os, const RoundingPolicy &rounding_policy)
+{
+    switch(rounding_policy)
+    {
+        case RoundingPolicy::TO_ZERO:
+            os << "TO_ZERO";
+            break;
+        case RoundingPolicy::TO_NEAREST_UP:
+            os << "TO_NEAREST_UP";
+            break;
+        case RoundingPolicy::TO_NEAREST_EVEN:
+            os << "TO_NEAREST_EVEN";
+            break;
+        default:
+            ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+    }
+
+    return os;
+}
+
+/** Formatted output of the DataType type. */
+inline ::std::ostream &operator<<(::std::ostream &os, const DataType &data_type)
+{
+    switch(data_type)
+    {
+        case DataType::UNKNOWN:
+            os << "UNKNOWN";
+            break;
+        case DataType::U8:
+            os << "U8";
+            break;
+        case DataType::QS8:
+            os << "QS8";
+            break;
+        case DataType::S8:
+            os << "S8";
+            break;
+        case DataType::U16:
+            os << "U16";
+            break;
+        case DataType::S16:
+            os << "S16";
+            break;
+        case DataType::U32:
+            os << "U32";
+            break;
+        case DataType::S32:
+            os << "S32";
+            break;
+        case DataType::U64:
+            os << "U64";
+            break;
+        case DataType::S64:
+            os << "S64";
+            break;
+        case DataType::F16:
+            os << "F16";
+            break;
+        case DataType::F32:
+            os << "F32";
+            break;
+        case DataType::F64:
+            os << "F64";
+            break;
+        case DataType::SIZET:
+            os << "SIZET";
+            break;
+        default:
+            ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+    }
+
+    return os;
+}
+
+/** Formatted output of the Format type. */
+inline ::std::ostream &operator<<(::std::ostream &os, const Format &format)
+{
+    switch(format)
+    {
+        case Format::UNKNOWN:
+            os << "UNKNOWN";
+            break;
+        case Format::U8:
+            os << "U8";
+            break;
+        case Format::S16:
+            os << "S16";
+            break;
+        case Format::U16:
+            os << "U16";
+            break;
+        case Format::S32:
+            os << "S32";
+            break;
+        case Format::U32:
+            os << "U32";
+            break;
+        case Format::F16:
+            os << "F16";
+            break;
+        case Format::F32:
+            os << "F32";
+            break;
+        case Format::UV88:
+            os << "UV88";
+            break;
+        case Format::RGB888:
+            os << "RGB888";
+            break;
+        case Format::RGBA8888:
+            os << "RGBA8888";
+            break;
+        case Format::YUV444:
+            os << "YUV444";
+            break;
+        case Format::YUYV422:
+            os << "YUYV422";
+            break;
+        case Format::NV12:
+            os << "NV12";
+            break;
+        case Format::NV21:
+            os << "NV21";
+            break;
+        case Format::IYUV:
+            os << "IYUV";
+            break;
+        case Format::UYVY422:
+            os << "UYVY422";
+            break;
+        default:
+            ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+    }
+
+    return os;
+}
+
+/** Formatted output of the Channel type. */
+inline ::std::ostream &operator<<(::std::ostream &os, const Channel &channel)
+{
+    switch(channel)
+    {
+        case Channel::UNKNOWN:
+            os << "UNKNOWN";
+            break;
+        case Channel::C0:
+            os << "C0";
+            break;
+        case Channel::C1:
+            os << "C1";
+            break;
+        case Channel::C2:
+            os << "C2";
+            break;
+        case Channel::C3:
+            os << "C3";
+            break;
+        case Channel::R:
+            os << "R";
+            break;
+        case Channel::G:
+            os << "G";
+            break;
+        case Channel::B:
+            os << "B";
+            break;
+        case Channel::A:
+            os << "A";
+            break;
+        case Channel::Y:
+            os << "Y";
+            break;
+        case Channel::U:
+            os << "U";
+            break;
+        case Channel::V:
+            os << "V";
+            break;
+        default:
+            ARM_COMPUTE_ERROR("NOT_SUPPORTED!");
+    }
+
+    return os;
+}
+
+/** Formatted output of the BorderSize type. */
+inline ::std::ostream &operator<<(::std::ostream &os, const BorderSize &border)
+{
+    os << "{" << border.top << ", "
+       << border.right << ", "
+       << border.bottom << ", "
+       << border.left << "}";
+
+    return os;
+}
+} // namespace arm_compute
+#endif
diff --git a/tests/TypeReader.h b/tests/TypeReader.h
new file mode 100644
index 0000000000..82eb9e42cf
--- /dev/null
+++ b/tests/TypeReader.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_TYPE_READER_H__
+#define __ARM_COMPUTE_TEST_TYPE_READER_H__
+
+#include "arm_compute/core/Types.h"
+
+#include <algorithm>
+#include <cctype>
+#include <istream>
+
+namespace arm_compute
+{
+/** Formatted input of the BorderMode type. */
+inline ::std::istream &operator>>(::std::istream &is, BorderMode &mode)
+{
+    std::string value;
+
+    is >> value;
+
+    std::transform(value.begin(), value.end(), value.begin(), [](unsigned char c)
+    {
+        return std::toupper(c);
+    });
+
+    if(value == "UNDEFINED")
+    {
+        mode = BorderMode::UNDEFINED;
+    }
+    else if(value == "CONSTANT")
+    {
+        mode = BorderMode::CONSTANT;
+    }
+    else if(value == "REPLICATE")
+    {
+        mode = BorderMode::REPLICATE;
+    }
+    else
+    {
+        throw std::invalid_argument("Unsupported value '" + value + "' for border mode");
+    }
+
+    return is;
+}
+} // namespace arm_compute
+#endif
diff --git a/tests/Types.h b/tests/Types.h
new file mode 100644
index 0000000000..2cb69ff04e
--- /dev/null
+++ b/tests/Types.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_TYPES_H__
+#define __ARM_COMPUTE_TEST_TYPES_H__
+namespace arm_compute
+{
+/** Fixed point operation */
+enum class FixedPointOp
+{
+    EXP,       /**< Exponential */
+    LOG,       /**< Logarithm */
+    INV_SQRT,  /**< Inverse square root */
+    RECIPROCAL /**< Reciprocal */
+};
+} // namespace arm_compute
+#endif
diff --git a/tests/UserConfiguration.cpp b/tests/UserConfiguration.cpp
new file mode 100644
index 0000000000..a24de90468
--- /dev/null
+++ b/tests/UserConfiguration.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "UserConfiguration.h"
+
+#include "ProgramOptions.h"
+
+#include <string>
+
+namespace arm_compute
+{
+namespace test
+{
+UserConfiguration::UserConfiguration(const ProgramOptions &options)
+{
+    std::random_device::result_type tmp_seed = 0;
+    if(options.get("seed", tmp_seed))
+    {
+        seed = tmp_seed;
+    }
+
+    std::string tmp_path;
+    if(options.get("path", tmp_path))
+    {
+        path = tmp_path;
+    }
+
+    unsigned int tmp_threads = 0;
+    if(options.get("threads", tmp_threads))
+    {
+        threads = tmp_threads;
+    }
+}
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/UserConfiguration.h b/tests/UserConfiguration.h
new file mode 100644
index 0000000000..dad0960bdb
--- /dev/null
+++ b/tests/UserConfiguration.h
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_USER_CONFIGURATION_H__
+#define __ARM_COMPUTE_TEST_USER_CONFIGURATION_H__
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Types.h"
+
+#include <random>
+#include <string>
+
+namespace arm_compute
+{
+namespace test
+{
+class ProgramOptions;
+
+/** Container providing easy access to runtime options provided by the user. */
+struct UserConfiguration
+{
+protected:
+    /** Wrapper around options to store if an option has been set. */
+    template <typename T>
+    class Option
+    {
+    public:
+        /** Initialise the option to its default (C++) value and mark it as 'not set'. */
+        Option();
+
+        /** Initialise the option to the given @p value and mark it as 'set'. */
+        Option(const T &value);
+
+        /** Assign the given @p value and mark it as 'set'. */
+        Option<T> &operator=(const T &value);
+
+        /** Query if the option has been set. */
+        constexpr bool is_set() const;
+
+        /** Return the underlying value as constant. */
+        T get() const;
+
+        /** Return the underlying value. */
+        T &get();
+
+        /** Implicitly return the underlying value. */
+        operator T() const;
+
+    private:
+        T    _value;
+        bool _is_set;
+    };
+
+public:
+    UserConfiguration() = default;
+
+    /** Initialise the configuration according to the program options.
+     *
+     * @param[in] options Parsed command line options.
+     */
+    UserConfiguration(const ProgramOptions &options);
+
+    Option<std::string>                     path{};
+    Option<std::random_device::result_type> seed{};
+    Option<unsigned int>                    threads{};
+};
+
+template <typename T>
+UserConfiguration::Option<T>::Option()
+    : _value{}, _is_set{ false }
+{
+}
+
+template <typename T>
+UserConfiguration::Option<T>::Option(const T &value)
+    : _value{ value }, _is_set{ true }
+{
+}
+
+template <typename T>
+UserConfiguration::Option<T> &UserConfiguration::Option<T>::operator=(const T &value)
+{
+    _value  = value;
+    _is_set = true;
+
+    return *this;
+}
+
+template <typename T>
+constexpr bool UserConfiguration::Option<T>::is_set() const
+{
+    return _is_set;
+}
+
+template <typename T>
+T UserConfiguration::Option<T>::get() const
+{
+    ARM_COMPUTE_ERROR_ON(!is_set());
+    return _value;
+}
+
+template <typename T>
+T &UserConfiguration::Option<T>::get()
+{
+    return _value;
+}
+
+template <typename T>
+UserConfiguration::Option<T>::operator T() const
+{
+    ARM_COMPUTE_ERROR_ON(!is_set());
+    return _value;
+}
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/Utils.h b/tests/Utils.h
new file mode 100644
index 0000000000..420890442a
--- /dev/null
+++ b/tests/Utils.h
@@ -0,0 +1,672 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_UTILS_H__
+#define __ARM_COMPUTE_TEST_UTILS_H__
+
+#include "arm_compute/core/Coordinates.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/FixedPoint.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+
+#include <cmath>
+#include <cstddef>
+#include <limits>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <type_traits>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace cpp11
+{
+#ifdef __ANDROID__
+/** Convert integer and float values to string.
+ *
+ * @note This function implements the same behaviour as std::to_string. The
+ *       latter is missing in some Android toolchains.
+ *
+ * @param[in] value Value to be converted to string.
+ *
+ * @return String representation of @p value.
+ */
+template <typename T, typename std::enable_if<std::is_arithmetic<typename std::decay<T>::type>::value, int>::type = 0>
+std::string to_string(T && value)
+{
+    std::stringstream stream;
+    stream << std::forward<T>(value);
+    return stream.str();
+}
+
+/** Convert string values to integer.
+ *
+ * @note This function implements the same behaviour as std::stoi. The latter
+ *       is missing in some Android toolchains.
+ *
+ * @param[in] str String to be converted to int.
+ *
+ * @return Integer representation of @p str.
+ */
+inline int stoi(const std::string &str)
+{
+    std::stringstream stream(str);
+    int               value = 0;
+    stream >> value;
+    return value;
+}
+
+/** Convert string values to unsigned long.
+ *
+ * @note This function implements the same behaviour as std::stoul. The latter
+ *       is missing in some Android toolchains.
+ *
+ * @param[in] str String to be converted to unsigned long.
+ *
+ * @return Unsigned long representation of @p str.
+ */
+inline unsigned long stoul(const std::string &str)
+{
+    std::stringstream stream(str);
+    unsigned long     value = 0;
+    stream >> value;
+    return value;
+}
+
+/** Convert string values to float.
+ *
+ * @note This function implements the same behaviour as std::stof. The latter
+ *       is missing in some Android toolchains.
+ *
+ * @param[in] str String to be converted to float.
+ *
+ * @return Float representation of @p str.
+ */
+inline float stof(const std::string &str)
+{
+    std::stringstream stream(str);
+    float             value = 0.f;
+    stream >> value;
+    return value;
+}
+
+/** Round floating-point value with half value rounding away from zero.
+ *
+ * @note This function implements the same behaviour as std::round except that it doesn't
+ *       support Integral type. The latter is not in the namespace std in some Android toolchains.
+ *
+ * @param[in] value floating-point value to be rounded.
+ *
+ * @return Floating-point value of rounded @p value.
+ */
+template <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+inline T round(T value)
+{
+    return ::round(value);
+}
+
+/** Truncate floating-point value.
+ *
+ * @note This function implements the same behaviour as std::truncate except that it doesn't
+ *       support Integral type. The latter is not in the namespace std in some Android toolchains.
+ *
+ * @param[in] value floating-point value to be truncated.
+ *
+ * @return Floating-point value of truncated @p value.
+ */
+template <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+inline T trunc(T value)
+{
+    return ::trunc(value);
+}
+
+/** Composes a floating point value with the magnitude of @p x and the sign of @p y.
+ *
+ * @note This function implements the same behaviour as std::copysign except that it doesn't
+ *       support Integral type. The latter is not in the namespace std in some Android toolchains.
+ *
+ * @param[in] x value that contains the magnitued to be used in constructing the result.
+ * @param[in] y value that contains the sign to be used in constructin the result.
+ *
+ * @return Floating-point value with magnitude of @p x and sign of @p y.
+ */
+template <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+inline T copysign(T x, T y)
+{
+    return ::copysign(x, y);
+}
+#else
+/** Convert integer and float values to string.
+ *
+ * @note This function acts as a convenience wrapper around std::to_string. The
+ *       latter is missing in some Android toolchains.
+ *
+ * @param[in] value Value to be converted to string.
+ *
+ * @return String representation of @p value.
+ */
+template <typename T>
+std::string to_string(T &&value)
+{
+    return ::std::to_string(std::forward<T>(value));
+}
+
+/** Convert string values to integer.
+ *
+ * @note This function acts as a convenience wrapper around std::stoi. The
+ *       latter is missing in some Android toolchains.
+ *
+ * @param[in] args Arguments forwarded to std::stoi.
+ *
+ * @return Integer representation of input string.
+ */
+template <typename... Ts>
+int stoi(Ts &&... args)
+{
+    return ::std::stoi(std::forward<Ts>(args)...);
+}
+
+/** Convert string values to unsigned long.
+ *
+ * @note This function acts as a convenience wrapper around std::stoul. The
+ *       latter is missing in some Android toolchains.
+ *
+ * @param[in] args Arguments forwarded to std::stoul.
+ *
+ * @return Unsigned long representation of input string.
+ */
+template <typename... Ts>
+int stoul(Ts &&... args)
+{
+    return ::std::stoul(std::forward<Ts>(args)...);
+}
+
+/** Convert string values to float.
+ *
+ * @note This function acts as a convenience wrapper around std::stof. The
+ *       latter is missing in some Android toolchains.
+ *
+ * @param[in] args Arguments forwarded to std::stof.
+ *
+ * @return Float representation of input string.
+ */
+template <typename... Ts>
+int stof(Ts &&... args)
+{
+    return ::std::stof(std::forward<Ts>(args)...);
+}
+
+/** Round floating-point value with half value rounding away from zero.
+ *
+ * @note This function implements the same behaviour as std::round except that it doesn't
+ *       support Integral type. The latter is not in the namespace std in some Android toolchains.
+ *
+ * @param[in] value floating-point value to be rounded.
+ *
+ * @return Floating-point value of rounded @p value.
+ */
+template <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+inline T round(T value)
+{
+    return std::round(value);
+}
+
+/** Truncate floating-point value.
+ *
+ * @note This function implements the same behaviour as std::truncate except that it doesn't
+ *       support Integral type. The latter is not in the namespace std in some Android toolchains.
+ *
+ * @param[in] value floating-point value to be truncated.
+ *
+ * @return Floating-point value of truncated @p value.
+ */
+template <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+inline T trunc(T value)
+{
+    return std::trunc(value);
+}
+
+/** Composes a floating point value with the magnitude of @p x and the sign of @p y.
+ *
+ * @note This function implements the same behaviour as std::copysign except that it doesn't
+ *       support Integral type. The latter is not in the namespace std in some Android toolchains.
+ *
+ * @param[in] x value that contains the magnitued to be used in constructing the result.
+ * @param[in] y value that contains the sign to be used in constructin the result.
+ *
+ * @return Floating-point value with magnitude of @p x and sign of @p y.
+ */
+template <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+inline T copysign(T x, T y)
+{
+    return std::copysign(x, y);
+}
+#endif
+
+/** Round floating-point value with half value rounding to positive infinity.
+ *
+ * @param[in] value floating-point value to be rounded.
+ *
+ * @return Floating-point value of rounded @p value.
+ */
+template <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+inline T round_half_up(T value)
+{
+    return std::floor(value + 0.5f);
+}
+
+/** Round floating-point value with half value rounding to nearest even.
+ *
+ * @param[in] value   floating-point value to be rounded.
+ * @param[in] epsilon precision.
+ *
+ * @return Floating-point value of rounded @p value.
+ */
+template <typename T, typename = typename std::enable_if<std::is_floating_point<T>::value>::type>
+inline T round_half_even(T value, T epsilon = std::numeric_limits<T>::epsilon())
+{
+    T positive_value = std::abs(value);
+    T ipart          = 0;
+    std::modf(positive_value, &ipart);
+    // If 'value' is exactly halfway between two integers
+    if(std::abs(positive_value - (ipart + 0.5f)) < epsilon)
+    {
+        // If 'ipart' is even then return 'ipart'
+        if(std::fmod(ipart, 2.f) < epsilon)
+        {
+            return cpp11::copysign(ipart, value);
+        }
+        // Else return the nearest even integer
+        return cpp11::copysign(std::ceil(ipart + 0.5f), value);
+    }
+    // Otherwise use the usual round to closest
+    return cpp11::copysign(cpp11::round(positive_value), value);
+}
+} // namespace cpp11
+
+namespace cpp14
+{
+/** make_unqiue is missing in CPP11. Reimplement it according to the standard
+ * proposal.
+ */
+template <class T>
+struct _Unique_if
+{
+    typedef std::unique_ptr<T> _Single_object;
+};
+
+template <class T>
+struct _Unique_if<T[]>
+{
+    typedef std::unique_ptr<T[]> _Unknown_bound;
+};
+
+template <class T, size_t N>
+struct _Unique_if<T[N]>
+{
+    typedef void _Known_bound;
+};
+
+template <class T, class... Args>
+typename _Unique_if<T>::_Single_object
+make_unique(Args &&... args)
+{
+    return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
+}
+
+template <class T>
+typename _Unique_if<T>::_Unknown_bound
+make_unique(size_t n)
+{
+    typedef typename std::remove_extent<T>::type U;
+    return std::unique_ptr<T>(new U[n]());
+}
+
+template <class T, class... Args>
+typename _Unique_if<T>::_Known_bound
+make_unique(Args &&...) = delete;
+} // namespace cpp14
+
+namespace traits
+{
+// *INDENT-OFF*
+// clang-format off
+template <typename T> struct promote { };
+template <> struct promote<uint8_t> { using type = uint16_t; };
+template <> struct promote<int8_t> { using type = int16_t; };
+template <> struct promote<uint16_t> { using type = uint32_t; };
+template <> struct promote<int16_t> { using type = int32_t; };
+template <> struct promote<uint32_t> { using type = uint64_t; };
+template <> struct promote<int32_t> { using type = int64_t; };
+template <> struct promote<float> { using type = float; };
+
+template <typename T>
+using promote_t = typename promote<T>::type;
+
+template <typename T>
+using make_signed_conditional_t = typename std::conditional<std::is_integral<T>::value, std::make_signed<T>, std::common_type<T>>::type;
+// clang-format on
+// *INDENT-ON*
+}
+
+/** Look up the format corresponding to a channel.
+ *
+ * @param[in] channel Channel type.
+ *
+ * @return Format that contains the given channel.
+ */
+inline Format get_format_for_channel(Channel channel)
+{
+    switch(channel)
+    {
+        case Channel::R:
+        case Channel::G:
+        case Channel::B:
+            return Format::RGB888;
+        default:
+            throw std::runtime_error("Unsupported channel");
+    }
+}
+
+/** Return the format of a channel.
+ *
+ * @param[in] channel Channel type.
+ *
+ * @return Format of the given channel.
+ */
+inline Format get_channel_format(Channel channel)
+{
+    switch(channel)
+    {
+        case Channel::R:
+        case Channel::G:
+        case Channel::B:
+            return Format::U8;
+        default:
+            throw std::runtime_error("Unsupported channel");
+    }
+}
+
+/** Base case of foldl.
+ *
+ * @return value.
+ */
+template <typename F, typename T>
+inline T foldl(F &&, const T &value)
+{
+    return value;
+}
+
+/** Base case of foldl.
+ *
+ * @return func(value1, value2).
+ */
+template <typename F, typename T, typename U>
+inline auto foldl(F &&func, T &&value1, U &&value2) -> decltype(func(value1, value2))
+{
+    return func(value1, value2);
+}
+
+/** Fold left.
+ *
+ * @param[in] func    Binary function to be called.
+ * @param[in] initial Initial value.
+ * @param[in] value   Argument passed to the function.
+ * @param[in] values  Remaining arguments.
+ */
+template <typename F, typename I, typename T, typename... Vs>
+inline I foldl(F &&func, I &&initial, T &&value, Vs &&... values)
+{
+    return foldl(std::forward<F>(func), func(std::forward<I>(initial), std::forward<T>(value)), std::forward<Vs>(values)...);
+}
+
+/** Create a valid region covering the enitre tensor shape.
+ *
+ * @param[in] shape Shape used as size of the valid region.
+ *
+ * @return A valid region starting at (0, 0, ...) with size of @p shape.
+ */
+inline ValidRegion shape_to_valid_region(TensorShape shape)
+{
+    Coordinates anchor;
+    anchor.set(std::max<int>(0, shape.num_dimensions() - 1), 0);
+    return ValidRegion(std::move(anchor), std::move(shape));
+}
+
+/** Create a valid region covering the tensor shape with UNDEFINED border mode and specified border size.
+ *
+ * @param[in] shape       Shape used as size of the valid region.
+ * @param[in] border_size Border size used to specify the region to exclude.
+ *
+ * @return A valid region starting at (@p border_size.left, @p border_size.top, ...) with reduced size of @p shape.
+ */
+inline ValidRegion shape_to_valid_region_undefined_border(TensorShape shape, BorderSize border_size)
+{
+    ARM_COMPUTE_ERROR_ON(shape.num_dimensions() < 2);
+    Coordinates anchor;
+    anchor.set(std::max<int>(0, shape.num_dimensions() - 1), 0);
+    anchor.set(0, border_size.left);
+    anchor.set(1, border_size.top);
+    shape.set(0, shape.x() - border_size.left - border_size.right);
+    shape.set(1, shape.y() - border_size.top - border_size.bottom);
+    return ValidRegion(std::move(anchor), shape);
+}
+
+/** Calculate the required padding given the available @p size and the required.
+ * @p step.
+ *
+ * @param[in] size Available size.
+ * @param[in] step Required step size.
+ *
+ * @return Difference between next greater multiple of @p step and @p size.
+ */
+inline int required_padding(int size, int step)
+{
+    return ((size + step - 1) / step) * step - size;
+}
+
+/** Calculate the required padding for writing operation with UNDEFINED border mode.
+ *
+ * @param[in] size        Available size.
+ * @param[in] step        Required step size; number of elements to write at each iteration.
+ * @param[in] border_size Border size.
+ *
+ * @return Required padding size plus border size.
+ */
+inline int required_padding_undefined_border_write(int size, int step, int border_size)
+{
+    return required_padding(size, step) + border_size;
+}
+
+/** Calculate the required padding for reading operation with UNDEFINED border mode.
+ *
+ * @param[in] size         Available size.
+ * @param[in] read_step    Required step size; number of elements to read at each iteration.
+ * @param[in] process_step Required step size; number of elements to process at each iteration.
+ *
+ * @return Required padding size.
+ */
+inline int required_padding_undefined_border_read(int size, int read_step, int process_step)
+{
+    return required_padding(size, process_step) + read_step - process_step;
+}
+
+/** Write the value after casting the pointer according to @p data_type.
+ *
+ * @warning The type of the value must match the specified data type.
+ *
+ * @param[out] ptr       Pointer to memory where the @p value will be written.
+ * @param[in]  value     Value that will be written.
+ * @param[in]  data_type Data type that will be written.
+ */
+template <typename T>
+void store_value_with_data_type(void *ptr, T value, DataType data_type)
+{
+    switch(data_type)
+    {
+        case DataType::U8:
+            *reinterpret_cast<uint8_t *>(ptr) = value;
+            break;
+        case DataType::S8:
+        case DataType::QS8:
+            *reinterpret_cast<int8_t *>(ptr) = value;
+            break;
+        case DataType::U16:
+            *reinterpret_cast<uint16_t *>(ptr) = value;
+            break;
+        case DataType::S16:
+            *reinterpret_cast<int16_t *>(ptr) = value;
+            break;
+        case DataType::U32:
+            *reinterpret_cast<uint32_t *>(ptr) = value;
+            break;
+        case DataType::S32:
+            *reinterpret_cast<int32_t *>(ptr) = value;
+            break;
+        case DataType::U64:
+            *reinterpret_cast<uint64_t *>(ptr) = value;
+            break;
+        case DataType::S64:
+            *reinterpret_cast<int64_t *>(ptr) = value;
+            break;
+#ifdef ENABLE_FP16
+        case DataType::F16:
+            *reinterpret_cast<float16_t *>(ptr) = value;
+            break;
+#endif /* ENABLE_FP16 */
+        case DataType::F32:
+            *reinterpret_cast<float *>(ptr) = value;
+            break;
+        case DataType::F64:
+            *reinterpret_cast<double *>(ptr) = value;
+            break;
+        case DataType::SIZET:
+            *reinterpret_cast<size_t *>(ptr) = value;
+            break;
+        default:
+            ARM_COMPUTE_ERROR("NOT SUPPORTED!");
+    }
+}
+
+/** Saturate a value of type T against the numeric limits of type U.
+ *
+ * @param[in] val Value to be saturated.
+ *
+ * @return saturated value.
+ */
+template <typename U, typename T>
+T saturate_cast(T val)
+{
+    if(val > static_cast<T>(std::numeric_limits<U>::max()))
+    {
+        val = static_cast<T>(std::numeric_limits<U>::max());
+    }
+    if(val < static_cast<T>(std::numeric_limits<U>::lowest()))
+    {
+        val = static_cast<T>(std::numeric_limits<U>::lowest());
+    }
+    return val;
+}
+
+/** Find the signed promoted common type.
+ */
+template <typename... T>
+struct common_promoted_signed_type
+{
+    using common_type       = typename std::common_type<T...>::type;
+    using promoted_type     = traits::promote_t<common_type>;
+    using intermediate_type = typename traits::make_signed_conditional_t<promoted_type>::type;
+};
+
+/** Convert a linear index into n-dimensional coordinates.
+ *
+ * @param[in] shape Shape of the n-dimensional tensor.
+ * @param[in] index Linear index specifying the i-th element.
+ *
+ * @return n-dimensional coordinates.
+ */
+inline Coordinates index2coord(const TensorShape &shape, int index)
+{
+    int num_elements = shape.total_size();
+
+    ARM_COMPUTE_ERROR_ON_MSG(index < 0 || index >= num_elements, "Index has to be in [0, num_elements]");
+    ARM_COMPUTE_ERROR_ON_MSG(num_elements == 0, "Cannot create coordinate from empty shape");
+
+    Coordinates coord{ 0 };
+
+    for(int d = shape.num_dimensions() - 1; d >= 0; --d)
+    {
+        num_elements /= shape[d];
+        coord.set(d, index / num_elements);
+        index %= num_elements;
+    }
+
+    return coord;
+}
+
+/** Linearise the given coordinate.
+ *
+ * Transforms the given coordinate into a linear offset in terms of
+ * elements.
+ *
+ * @param[in] shape Shape of the n-dimensional tensor.
+ * @param[in] coord The to be converted coordinate.
+ *
+ * @return Linear offset to the element.
+ */
+inline int coord2index(const TensorShape &shape, const Coordinates &coord)
+{
+    ARM_COMPUTE_ERROR_ON_MSG(shape.total_size() == 0, "Cannot get index from empty shape");
+    ARM_COMPUTE_ERROR_ON_MSG(coord.num_dimensions() == 0, "Cannot get index of empty coordinate");
+
+    int index    = 0;
+    int dim_size = 1;
+
+    for(unsigned int i = 0; i < coord.num_dimensions(); ++i)
+    {
+        index += coord[i] * dim_size;
+        dim_size *= shape[i];
+    }
+
+    return index;
+}
+
+/** Check if a coordinate is within a valid region */
+inline bool is_in_valid_region(const ValidRegion &valid_region, const Coordinates &coord)
+{
+    ARM_COMPUTE_ERROR_ON_MSG(valid_region.shape.num_dimensions() != coord.num_dimensions(), "Shapes of valid region and coordinates do not agree");
+    for(int d = 0; static_cast<size_t>(d) < coord.num_dimensions(); ++d)
+    {
+        if(coord[d] < valid_region.start(d) || coord[d] >= valid_region.end(d))
+        {
+            return false;
+        }
+    }
+    return true;
+}
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/benchmark/CL/ActivationLayer.cpp b/tests/benchmark/CL/ActivationLayer.cpp
new file mode 100644
index 0000000000..5180d3d900
--- /dev/null
+++ b/tests/benchmark/CL/ActivationLayer.cpp
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CL/CLAccessor.h"
+#include "CL/Helper.h"
+#include "Globals.h"
+#include "TensorLibrary.h"
+#include "benchmark/Datasets.h"
+#include "benchmark/Profiler.h"
+#include "benchmark/WallClockTimer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
+
+#include "benchmark/benchmark_api.h"
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+using namespace arm_compute::test::cl;
+
+#include "benchmark/common/ActivationLayer.h"
+
+namespace
+{
+using ActivationLayerAlexNet   = ActivationLayer<AlexNetActivationLayerDataset, CLTensor, CLAccessor, CLActivationLayer>;
+using ActivationLayerLeNet5    = ActivationLayer<LeNet5ActivationLayerDataset, CLTensor, CLAccessor, CLActivationLayer>;
+using ActivationLayerGoogLeNet = ActivationLayer<GoogLeNetActivationLayerDataset, CLTensor, CLAccessor, CLActivationLayer>;
+} // namespace
+
+BENCHMARK_DEFINE_F(ActivationLayerAlexNet, cl_alexnet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        act_layer.run();
+        CLScheduler::get().sync();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 1, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 2, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 3, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerAlexNet, cl_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 4, 1, 4, 8>);
+
+BENCHMARK_DEFINE_F(ActivationLayerLeNet5, cl_lenet5)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        act_layer.run();
+        CLScheduler::get().sync();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(ActivationLayerLeNet5, cl_lenet5)
+->Threads(1)
+->Apply(DataSetArgBatched<LeNet5ActivationLayerDataset, 0, 1, 4, 8>);
+
+BENCHMARK_DEFINE_F(ActivationLayerGoogLeNet, cl_googlenet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        act_layer.run();
+        CLScheduler::get().sync();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 1, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 2, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 3, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 4, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 5, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 6, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 7, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 8, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 9, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 10, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 11, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 12, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 13, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 14, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 15, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 16, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 17, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 18, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 19, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 20, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 21, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 22, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 23, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 24, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 25, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 26, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 27, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 28, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 29, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 30, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 31, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 32, 1, 4, 8>);
diff --git a/tests/benchmark/CL/BitwiseAnd.cpp b/tests/benchmark/CL/BitwiseAnd.cpp
new file mode 100644
index 0000000000..a3deb3eb5b
--- /dev/null
+++ b/tests/benchmark/CL/BitwiseAnd.cpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CL/CLAccessor.h"
+#include "CL/Helper.h"
+#include "Globals.h"
+#include "TensorLibrary.h"
+#include "benchmark/Datasets.h"
+#include "benchmark/Profiler.h"
+#include "benchmark/WallClockTimer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLBitwiseAnd.h"
+
+#include "benchmark/benchmark_api.h"
+
+#include <memory>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+using namespace arm_compute::test::cl;
+
+namespace
+{
+template <typename DataSet>
+class BitwiseAnd : public ::benchmark::Fixture
+{
+public:
+    void SetUp(::benchmark::State &state) override
+    {
+        ::benchmark::Fixture::SetUp(state);
+
+        profiler.add(std::make_shared<WallClockTimer>());
+
+        const std::string image_name = *(DataSet().begin() + state.range(0));
+
+        // Create tensors
+        src1 = create_tensor(image_name, DataType::U8);
+        src2 = create_tensor(image_name, DataType::U8);
+        dst  = create_tensor(image_name, DataType::U8);
+
+        // Create and configure function
+        band.configure(&src1, &src2, &dst);
+
+        // Allocate tensors
+        src1.allocator()->allocate();
+        src2.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        // Fill source tensors
+        library->fill(CLAccessor(src1), image_name, Channel::R);
+        library->fill(CLAccessor(src2), image_name, Channel::G);
+    }
+
+    void TearDown(::benchmark::State &state) override
+    {
+        profiler.submit(state);
+
+        ::benchmark::Fixture::TearDown(state);
+    }
+
+    CLBitwiseAnd band{};
+    Profiler     profiler{};
+
+private:
+    CLTensor src1{};
+    CLTensor src2{};
+    CLTensor dst{};
+};
+
+using BitwiseAndSmall = BitwiseAnd<SmallImages>;
+using BitwiseAndLarge = BitwiseAnd<LargeImages>;
+} // namespace
+
+BENCHMARK_DEFINE_F(BitwiseAndSmall, cl_bitwise_and)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        band.run();
+        CLScheduler::get().sync();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(BitwiseAndSmall, cl_bitwise_and)
+->Threads(1)
+->Apply(DataSetArgs<SmallImages>);
+
+BENCHMARK_DEFINE_F(BitwiseAndLarge, cl_bitwise_and)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        band.run();
+        CLScheduler::get().sync();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(BitwiseAndLarge, cl_bitwise_and)
+->Threads(1)
+->Apply(DataSetArgs<LargeImages>);
diff --git a/tests/benchmark/CL/CMakeLists.txt b/tests/benchmark/CL/CMakeLists.txt
new file mode 100644
index 0000000000..8493309f40
--- /dev/null
+++ b/tests/benchmark/CL/CMakeLists.txt
@@ -0,0 +1,57 @@
+# Copyright (c) 2017 ARM Limited.
+#
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+cmake_minimum_required (VERSION 3.1)
+
+include_directories(${CMAKE_SOURCE_DIR}/../include)
+
+set(arm_compute_test_benchmark_TARGET_DEFINITIONS
+    ${arm_compute_test_benchmark_TARGET_DEFINITIONS}
+    -DOPENCL
+    PARENT_SCOPE
+)
+
+set(arm_compute_test_benchmark_TARGET_INCLUDES
+    ${arm_compute_test_benchmark_TARGET_INCLUDES}
+    ${CMAKE_SOURCE_DIR}/../include
+    PARENT_SCOPE
+)
+
+set(arm_compute_test_benchmark_OPENCL_SOURCE_FILES
+    ${CMAKE_SOURCE_DIR}/CL/CLAccessor.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/Bitwise/BitwiseAnd.cpp
+)
+
+add_library(arm_compute_test_benchmark_OPENCL OBJECT
+    ${arm_compute_test_benchmark_OPENCL_SOURCE_FILES}
+)
+
+set(arm_compute_test_benchmark_TARGET_OBJECTS
+    ${arm_compute_test_benchmark_TARGET_OBJECTS}
+    $<TARGET_OBJECTS:arm_compute_test_benchmark_OPENCL>
+    PARENT_SCOPE
+)
+
+set(arm_compute_test_benchmark_TARGET_LIBRARIES
+    ${arm_compute_test_benchmark_TARGET_LIBRARIES}
+    OpenCL
+    PARENT_SCOPE
+)
diff --git a/tests/benchmark/CL/ConvolutionLayer.cpp b/tests/benchmark/CL/ConvolutionLayer.cpp
new file mode 100644
index 0000000000..e1f4fabdc3
--- /dev/null
+++ b/tests/benchmark/CL/ConvolutionLayer.cpp
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CL/CLAccessor.h"
+#include "CL/Helper.h"
+#include "Globals.h"
+#include "TensorLibrary.h"
+#include "benchmark/Datasets.h"
+#include "benchmark/Profiler.h"
+#include "benchmark/WallClockTimer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
+
+#include "benchmark/benchmark_api.h"
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+using namespace arm_compute::test::cl;
+
+#include "benchmark/common/ConvolutionLayer.h"
+
+namespace
+{
+using ConvolutionLayerAlexNet    = ConvolutionLayer<AlexNetConvolutionLayerDataset, CLTensor, CLAccessor, CLConvolutionLayer>;
+using ConvolutionLayerLeNet5     = ConvolutionLayer<LeNet5ConvolutionLayerDataset, CLTensor, CLAccessor, CLConvolutionLayer>;
+using ConvolutionLayerGoogLeNet1 = ConvolutionLayer<GoogLeNetConvolutionLayerDataset1, CLTensor, CLAccessor, CLConvolutionLayer>;
+using ConvolutionLayerGoogLeNet2 = ConvolutionLayer<GoogLeNetConvolutionLayerDataset2, CLTensor, CLAccessor, CLConvolutionLayer>;
+} // namespace
+
+BENCHMARK_DEFINE_F(ConvolutionLayerAlexNet, cl_alexnet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        conv_layer->run();
+        CLScheduler::get().sync();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 1, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 2, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 3, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerAlexNet, cl_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 4, 1, 4, 8>);
+
+BENCHMARK_DEFINE_F(ConvolutionLayerLeNet5, cl_lenet5)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        conv_layer->run();
+        CLScheduler::get().sync();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(ConvolutionLayerLeNet5, cl_lenet5)
+->Threads(1)
+->Apply(DataSetArgBatched<LeNet5ConvolutionLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerLeNet5, cl_lenet5)
+->Threads(1)
+->Apply(DataSetArgBatched<LeNet5ConvolutionLayerDataset, 1, 1, 4, 8>);
+
+BENCHMARK_DEFINE_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        conv_layer->run();
+        CLScheduler::get().sync();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_DEFINE_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        conv_layer->run();
+        CLScheduler::get().sync();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 1, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 2, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 3, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 4, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 5, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 6, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 7, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 8, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 9, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 10, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 11, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 12, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 13, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 14, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 15, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 16, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 17, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 18, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 19, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 20, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 21, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 22, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 23, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 24, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 25, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 26, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 27, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 28, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 29, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 30, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 31, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 1, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 2, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 3, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 4, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 5, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 6, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 7, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 8, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 9, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 10, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 11, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 12, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 13, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 14, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 15, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 16, 1, 4, 8>);
diff --git a/tests/benchmark/CL/FullyConnectedLayer.cpp b/tests/benchmark/CL/FullyConnectedLayer.cpp
new file mode 100644
index 0000000000..6e8c89fa0b
--- /dev/null
+++ b/tests/benchmark/CL/FullyConnectedLayer.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CL/CLAccessor.h"
+#include "CL/Helper.h"
+#include "Globals.h"
+#include "TensorLibrary.h"
+#include "benchmark/Datasets.h"
+#include "benchmark/Profiler.h"
+#include "benchmark/WallClockTimer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
+
+#include "benchmark/benchmark_api.h"
+
+#include <memory>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+using namespace arm_compute::test::cl;
+
+#include "benchmark/common/FullyConnectedLayer.h"
+
+namespace
+{
+using FullyConnectedLayerAlexNet   = FullyConnectedLayer<AlexNetFullyConnectedLayerDataset, CLTensor, CLAccessor, CLFullyConnectedLayer>;
+using FullyConnectedLayerLeNet5    = FullyConnectedLayer<LeNet5FullyConnectedLayerDataset, CLTensor, CLAccessor, CLFullyConnectedLayer>;
+using FullyConnectedLayerGoogLeNet = FullyConnectedLayer<GoogLeNetFullyConnectedLayerDataset, CLTensor, CLAccessor, CLFullyConnectedLayer>;
+} // namespace
+
+BENCHMARK_DEFINE_F(FullyConnectedLayerAlexNet, cl_alexnet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        fc_layer->run();
+        CLScheduler::get().sync();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNet, cl_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNet, cl_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 1, 1, 4, 8>);
+BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNet, cl_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 2, 1, 4, 8>);
+
+BENCHMARK_DEFINE_F(FullyConnectedLayerLeNet5, cl_lenet5)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        fc_layer->run();
+        CLScheduler::get().sync();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(FullyConnectedLayerLeNet5, cl_lenet5)
+->Threads(1)
+->Apply(DataSetArgBatched<LeNet5FullyConnectedLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(FullyConnectedLayerLeNet5, cl_lenet5)
+->Threads(1)
+->Apply(DataSetArgBatched<LeNet5FullyConnectedLayerDataset, 1, 1, 4, 8>);
+
+BENCHMARK_DEFINE_F(FullyConnectedLayerGoogLeNet, cl_googlenet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        fc_layer->run();
+        CLScheduler::get().sync();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(FullyConnectedLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetFullyConnectedLayerDataset, 0, 1, 4, 8>);
diff --git a/tests/benchmark/CL/GEMM.cpp b/tests/benchmark/CL/GEMM.cpp
new file mode 100644
index 0000000000..b90556df48
--- /dev/null
+++ b/tests/benchmark/CL/GEMM.cpp
@@ -0,0 +1,492 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CL/CLAccessor.h"
+#include "CL/Helper.h"
+#include "Globals.h"
+#include "TensorLibrary.h"
+#include "benchmark/Datasets.h"
+#include "benchmark/Profiler.h"
+#include "benchmark/WallClockTimer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLGEMM.h"
+
+#include "benchmark/benchmark_api.h"
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+using namespace arm_compute::test::cl;
+
+#include "benchmark/CL/GEMM.h"
+
+namespace
+{
+using GEMMFP16GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, CLTensor, CLAccessor, CLGEMM, DataType::F16>;
+using GEMMFP16GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, CLTensor, CLAccessor, CLGEMM, DataType::F16>;
+using GEMMFP32GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, CLTensor, CLAccessor, CLGEMM, DataType::F32>;
+using GEMMFP32GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, CLTensor, CLAccessor, CLGEMM, DataType::F32>;
+} // namespace
+
+BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet1, cl_googlenet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        gemm_layer->run();
+        CLScheduler::get().sync();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet2, cl_googlenet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        gemm_layer->run();
+        CLScheduler::get().sync();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>);
+
+BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet1, cl_googlenet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        gemm_layer->run();
+        CLScheduler::get().sync();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet2, cl_googlenet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        gemm_layer->run();
+        CLScheduler::get().sync();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>);
diff --git a/tests/benchmark/CL/GEMM.h b/tests/benchmark/CL/GEMM.h
new file mode 100644
index 0000000000..02a339609c
--- /dev/null
+++ b/tests/benchmark/CL/GEMM.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_BENCHMARK_CL_GEMM_H__
+#define __ARM_COMPUTE_TEST_BENCHMARK_CL_GEMM_H__
+
+#include "TensorLibrary.h"
+#include "Utils.h"
+#include "dataset/GEMMDataset.h"
+
+#include <memory>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+// FIXME: Merge with NEON/GEMM.h into common/GEMM.h after adding F16 support to NEON GEMM and QS8 support to CL GEMM
+template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType data_type>
+class GEMM : public ::benchmark::Fixture
+{
+public:
+    void SetUp(::benchmark::State &state) override
+    {
+        ARM_COMPUTE_ERROR_ON_MSG(data_type != DataType::F16 && data_type != DataType::F32, "Unsupported data type for GEMM operation");
+
+        profiler.add(std::make_shared<WallClockTimer>());
+
+        const GEMMDataObject gemm_obj = *(DataSet().begin() + state.range(0));
+
+        TensorShape shape_a = gemm_obj.shape_a;
+        TensorShape shape_b = gemm_obj.shape_b;
+        TensorShape shape_c = gemm_obj.shape_c;
+        TensorShape shape_d = gemm_obj.shape_d;
+
+        // Create tensors
+        a = create_tensor(shape_a, data_type);
+        b = create_tensor(shape_b, data_type);
+        c = create_tensor(shape_c, data_type);
+        d = create_tensor(shape_d, data_type);
+
+        // Create and configure function
+        gemm_layer = std::unique_ptr<Function>(new Function());
+        gemm_layer->configure(&a, &b, &c, &d, gemm_obj.alpha, gemm_obj.beta);
+
+        // Allocate tensors
+        a.allocator()->allocate();
+        b.allocator()->allocate();
+        c.allocator()->allocate();
+        d.allocator()->allocate();
+    }
+
+    void TearDown(::benchmark::State &state) override
+    {
+        gemm_layer.reset();
+
+        a.allocator()->free();
+        b.allocator()->free();
+        c.allocator()->free();
+        d.allocator()->free();
+
+        profiler.submit(state);
+    }
+
+    std::unique_ptr<Function> gemm_layer{ nullptr };
+    Profiler                  profiler{};
+
+private:
+    TensorType a{};
+    TensorType b{};
+    TensorType c{};
+    TensorType d{};
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif //__ARM_COMPUTE_TEST_BENCHMARK_CL_GEMM_H__
diff --git a/tests/benchmark/CL/NormalizationLayer.cpp b/tests/benchmark/CL/NormalizationLayer.cpp
new file mode 100644
index 0000000000..81d3c65912
--- /dev/null
+++ b/tests/benchmark/CL/NormalizationLayer.cpp
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CL/CLAccessor.h"
+#include "CL/Helper.h"
+#include "Globals.h"
+#include "TensorLibrary.h"
+#include "benchmark/Datasets.h"
+#include "benchmark/Profiler.h"
+#include "benchmark/WallClockTimer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h"
+
+#include "benchmark/benchmark_api.h"
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+using namespace arm_compute::test::cl;
+
+#include "benchmark/common/NormalizationLayer.h"
+
+namespace
+{
+using NormalizationLayerAlexNet   = NormalizationLayer<AlexNetNormalizationLayerDataset, CLTensor, CLAccessor, CLNormalizationLayer>;
+using NormalizationLayerGoogLeNet = NormalizationLayer<GoogLeNetNormalizationLayerDataset, CLTensor, CLAccessor, CLNormalizationLayer>;
+
+} // namespace
+
+BENCHMARK_DEFINE_F(NormalizationLayerAlexNet, cl_alexnet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        norm_layer->run();
+        CLScheduler::get().sync();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(NormalizationLayerAlexNet, cl_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(NormalizationLayerAlexNet, cl_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 1, 1, 4, 8>);
+
+BENCHMARK_DEFINE_F(NormalizationLayerGoogLeNet, cl_googlenet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        norm_layer->run();
+        CLScheduler::get().sync();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(NormalizationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetNormalizationLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(NormalizationLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetNormalizationLayerDataset, 1, 1, 4, 8>);
diff --git a/tests/benchmark/CL/PoolingLayer.cpp b/tests/benchmark/CL/PoolingLayer.cpp
new file mode 100644
index 0000000000..5285f279e7
--- /dev/null
+++ b/tests/benchmark/CL/PoolingLayer.cpp
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CL/CLAccessor.h"
+#include "CL/Helper.h"
+#include "Globals.h"
+#include "TensorLibrary.h"
+#include "benchmark/Datasets.h"
+#include "benchmark/Profiler.h"
+#include "benchmark/WallClockTimer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h"
+
+#include "benchmark/benchmark_api.h"
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+using namespace arm_compute::test::cl;
+
+#include "benchmark/common/PoolingLayer.h"
+
+namespace
+{
+using PoolingLayerAlexNet   = PoolingLayer<AlexNetPoolingLayerDataset, CLTensor, CLAccessor, CLPoolingLayer>;
+using PoolingLayerLeNet5    = PoolingLayer<LeNet5PoolingLayerDataset, CLTensor, CLAccessor, CLPoolingLayer>;
+using PoolingLayerGoogLeNet = PoolingLayer<GoogLeNetPoolingLayerDataset, CLTensor, CLAccessor, CLPoolingLayer>;
+} // namespace
+
+BENCHMARK_DEFINE_F(PoolingLayerAlexNet, cl_alexnet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        pool_layer.run();
+        CLScheduler::get().sync();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(PoolingLayerAlexNet, cl_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerAlexNet, cl_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 1, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerAlexNet, cl_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 2, 1, 4, 8>);
+
+BENCHMARK_DEFINE_F(PoolingLayerLeNet5, cl_lenet5)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        pool_layer.run();
+        CLScheduler::get().sync();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(PoolingLayerLeNet5, cl_lenet5)
+->Threads(1)
+->Apply(DataSetArgBatched<LeNet5PoolingLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerLeNet5, cl_lenet5)
+->Threads(1)
+->Apply(DataSetArgBatched<LeNet5PoolingLayerDataset, 1, 1, 4, 8>);
+
+BENCHMARK_DEFINE_F(PoolingLayerGoogLeNet, cl_googlenet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        pool_layer.run();
+        CLScheduler::get().sync();
+        profiler.stop();
+    }
+}
+
+// FIXME: Add support for 7x7 pooling layer pool5/7x7_s1
+BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 1, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 2, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 3, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 4, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 5, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 6, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 7, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 8, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, cl_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 9, 1, 4, 8>);
diff --git a/tests/benchmark/CMakeLists.txt b/tests/benchmark/CMakeLists.txt
new file mode 100644
index 0000000000..115333a1b0
--- /dev/null
+++ b/tests/benchmark/CMakeLists.txt
@@ -0,0 +1,100 @@
+# Copyright (c) 2017 ARM Limited.
+#
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+cmake_minimum_required (VERSION 3.1)
+
+add_library(benchmark STATIC IMPORTED)
+set_target_properties(benchmark PROPERTIES
+    IMPORTED_LOCATION "${CMAKE_SOURCE_DIR}/../3rdparty/linux/armv7a/libbenchmark.a"
+)
+
+add_library(OpenCL SHARED IMPORTED)
+set_target_properties(OpenCL PROPERTIES
+    IMPORTED_LOCATION "${CMAKE_SOURCE_DIR}/../build/opencl-1.2-stubs/libOpenCL.so"
+    IMPORTED_NO_SONAME 1
+)
+
+option(ENABLE_PMU_COUNTER "Compile with PMU counter support")
+
+set(ARM_COMPUTE_TARGETS_TO_MEASURE "all" CACHE STRING "Semicolon-separated list of targets to include in validation.")
+
+set(ARM_COMPUTE_ALL_TARGETS
+    NEON
+    CL
+)
+
+if(ARM_COMPUTE_TARGETS_TO_MEASURE STREQUAL "all")
+    set(ARM_COMPUTE_TARGETS_TO_MEASURE ${ARM_COMPUTE_ALL_TARGETS})
+endif()
+
+list(REMOVE_DUPLICATES ARM_COMPUTE_TARGETS_TO_MEASURE)
+
+foreach(TARGET ${ARM_COMPUTE_TARGETS_TO_MEASURE})
+    list(FIND ARM_COMPUTE_ALL_TARGETS ${TARGET} idx)
+
+    if(${idx} LESS 0)
+        message(FATAL_ERROR "The target '${TARGET}' does not exist. It should be one of\n${ARM_COMPUTE_ALL_TARGETS}")
+    else()
+        add_subdirectory(${TARGET})
+    endif()
+endforeach()
+
+set(arm_compute_test_benchmark_SOURCE_FILES
+    ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/Datasets.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/Instrument.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/Profiler.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/Profiler.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/PerformanceProgramOptions.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/PerformanceProgramOptions.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/PerformanceUserConfiguration.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/PerformanceUserConfiguration.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/WallClockTimer.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/WallClockTimer.cpp
+)
+
+if(${ENABLE_PMU_COUNTER})
+    list(APPEND arm_compute_test_benchmark_SOURCE_FILES
+        ${CMAKE_CURRENT_SOURCE_DIR}/PMUCounter.h
+        ${CMAKE_CURRENT_SOURCE_DIR}/PMUCounter.cpp
+    )
+endif()
+
+add_library(arm_compute_test_benchmark OBJECT
+    ${arm_compute_test_benchmark_SOURCE_FILES}
+)
+
+add_definitions(${arm_compute_test_benchmark_TARGET_DEFINITIONS})
+include_directories(${arm_compute_test_benchmark_TARGET_INCLUDES})
+
+add_executable(arm_compute_benchmark
+    $<TARGET_OBJECTS:arm_compute_test_benchmark>
+    ${arm_compute_test_benchmark_TARGET_OBJECTS}
+    $<TARGET_OBJECTS:tensor_library>
+    $<TARGET_OBJECTS:arm_compute_test>
+)
+
+target_link_libraries(arm_compute_benchmark
+    benchmark
+    boost_program_options
+    arm_compute
+    ${arm_compute_test_benchmark_TARGET_LIBRARIES}
+)
diff --git a/tests/benchmark/Datasets.h b/tests/benchmark/Datasets.h
new file mode 100644
index 0000000000..e7bfb6f10f
--- /dev/null
+++ b/tests/benchmark/Datasets.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_BENCHMARK_DATASETS_H__
+#define __ARM_COMPUTE_TEST_BENCHMARK_DATASETS_H__
+
+#include "dataset/ActivationLayerDataset.h"
+#include "dataset/BorderModeDataset.h"
+#include "dataset/ConvolutionLayerDataset.h"
+#include "dataset/DataTypeDatasets.h"
+#include "dataset/FullyConnectedLayerDataset.h"
+#include "dataset/GEMMDataset.h"
+#include "dataset/ImageDatasets.h"
+#include "dataset/InterpolationPolicyDataset.h"
+#include "dataset/NormalizationLayerDataset.h"
+#include "dataset/PoolingLayerDataset.h"
+#include "dataset/ShapeDatasets.h"
+
+#include "benchmark/benchmark_api.h"
+
+#include <array>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+template <typename DataSet, int N>
+void DataSetArg(::benchmark::internal::Benchmark *b)
+{
+    b->Arg(N);
+    b->ArgName(std::string(*(DataSet().begin() + N)));
+}
+
+template <typename DataSet, int N, unsigned int... Args>
+void DataSetArgBatched(::benchmark::internal::Benchmark *b)
+{
+    constexpr std::array<unsigned int, sizeof...(Args)> batches{ { Args... } };
+    for(const auto &el : batches)
+    {
+        b->Args({ N, static_cast<int>(el) });
+    }
+    b->ArgNames({ std::string(*(DataSet().begin() + N)), "batch_size" });
+}
+
+template <typename DataSet>
+void DataSetArgs(::benchmark::internal::Benchmark *b)
+{
+    for(size_t i = 0; i < DataSet().size(); ++i)
+    {
+        b->Arg(i);
+        b->ArgName(*(DataSet().begin() + i));
+    }
+}
+}
+}
+}
+#endif
diff --git a/tests/benchmark/Instrument.h b/tests/benchmark/Instrument.h
new file mode 100644
index 0000000000..39b0088670
--- /dev/null
+++ b/tests/benchmark/Instrument.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_BENCHMARK_INSTRUMENT_H__
+#define __ARM_COMPUTE_TEST_BENCHMARK_INSTRUMENT_H__
+
+#include "Utils.h"
+
+#include <memory>
+#include <string>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+/** Interface for classes that can be used to measure performance. */
+class Instrument
+{
+public:
+    /** Interface defining a measurement, e.g. time, cycles, ... */
+    class IMeasurement
+    {
+    public:
+        IMeasurement()                     = default;
+        IMeasurement(const IMeasurement &) = default;
+        IMeasurement(IMeasurement &&)      = default;
+        IMeasurement &operator=(const IMeasurement &) = default;
+        IMeasurement &operator=(IMeasurement &&) = default;
+        virtual ~IMeasurement()                  = default;
+
+        virtual operator double() const = 0;
+    };
+
+    /** Implementation of a Measurement class for arihtmetic types. */
+    template <typename T>
+    class Measurement : public IMeasurement
+    {
+    public:
+        /** Store the given value as measurement.
+         *
+         * @param[in] value Measured value.
+         */
+        Measurement(T value);
+
+        operator double() const override;
+
+    private:
+        T _value;
+    };
+
+    Instrument()                   = default;
+    Instrument(const Instrument &) = default;
+    Instrument(Instrument &&)      = default;
+    Instrument &operator=(const Instrument &) = default;
+    Instrument &operator=(Instrument &&) = default;
+    virtual ~Instrument()                = default;
+
+    /** Identifier for the instrument */
+    virtual std::string id() const = 0;
+
+    /** Start measuring. */
+    virtual void start() = 0;
+
+    /** Stop measuring. */
+    virtual void stop() = 0;
+
+    /** Return the latest measurement. */
+    virtual std::unique_ptr<IMeasurement> get_measurement() const = 0;
+};
+
+template <typename T>
+Instrument::Measurement<T>::Measurement(T value)
+    : _value{ value }
+{
+}
+
+template <typename T>
+Instrument::Measurement<T>::operator double() const
+{
+    return _value;
+}
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/benchmark/NEON/ActivationLayer.cpp b/tests/benchmark/NEON/ActivationLayer.cpp
new file mode 100644
index 0000000000..8faed9f831
--- /dev/null
+++ b/tests/benchmark/NEON/ActivationLayer.cpp
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "benchmark/Datasets.h"
+#include "benchmark/Profiler.h"
+#include "benchmark/WallClockTimer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "benchmark/benchmark_api.h"
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+using namespace arm_compute::test::neon;
+
+#include "benchmark/common/ActivationLayer.h"
+
+namespace
+{
+using ActivationLayerAlexNetF32 = ActivationLayer<AlexNetActivationLayerDataset, Tensor, NEAccessor, NEActivationLayer>;
+using ActivationLayerAlexNetQS8 = ActivationLayer<AlexNetActivationLayerDataset, Tensor, NEAccessor, NEActivationLayer, DataType::QS8>;
+using ActivationLayerLeNet5     = ActivationLayer<LeNet5ActivationLayerDataset, Tensor, NEAccessor, NEActivationLayer, DataType::F32>;
+using ActivationLayerGoogLeNet  = ActivationLayer<GoogLeNetActivationLayerDataset, Tensor, NEAccessor, NEActivationLayer, DataType::F32>;
+} // namespace
+
+// F32
+BENCHMARK_DEFINE_F(ActivationLayerAlexNetF32, neon_alexnet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        act_layer.run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(ActivationLayerAlexNetF32, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerAlexNetF32, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 1, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerAlexNetF32, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 2, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerAlexNetF32, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 3, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerAlexNetF32, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 4, 1, 4, 8>);
+
+// QS8
+BENCHMARK_DEFINE_F(ActivationLayerAlexNetQS8, neon_alexnet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        act_layer.run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(ActivationLayerAlexNetQS8, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerAlexNetQS8, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 1, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerAlexNetQS8, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 2, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerAlexNetQS8, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 3, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerAlexNetQS8, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetActivationLayerDataset, 4, 1, 4, 8>);
+
+BENCHMARK_DEFINE_F(ActivationLayerLeNet5, neon_lenet5)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        act_layer.run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(ActivationLayerLeNet5, neon_lenet5)
+->Threads(1)
+->Apply(DataSetArgBatched<LeNet5ActivationLayerDataset, 0, 1, 4, 8>);
+
+BENCHMARK_DEFINE_F(ActivationLayerGoogLeNet, neon_googlenet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        act_layer.run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 1, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 2, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 3, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 4, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 5, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 6, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 7, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 8, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 9, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 10, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 11, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 12, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 13, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 14, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 15, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 16, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 17, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 18, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 19, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 20, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 21, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 22, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 23, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 24, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 25, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 26, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 27, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 28, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 29, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 30, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 31, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ActivationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetActivationLayerDataset, 32, 1, 4, 8>);
diff --git a/tests/benchmark/NEON/BitwiseAnd.cpp b/tests/benchmark/NEON/BitwiseAnd.cpp
new file mode 100644
index 0000000000..dba3d1ebea
--- /dev/null
+++ b/tests/benchmark/NEON/BitwiseAnd.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "benchmark/Datasets.h"
+#include "benchmark/Profiler.h"
+#include "benchmark/WallClockTimer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEBitwiseAnd.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "benchmark/benchmark_api.h"
+
+#include <memory>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+using namespace arm_compute::test::neon;
+
+namespace
+{
+template <typename DataSet>
+class BitwiseAnd : public ::benchmark::Fixture
+{
+public:
+    void SetUp(::benchmark::State &state) override
+    {
+        profiler.add(std::make_shared<WallClockTimer>());
+
+        const std::string image_name = *(DataSet().begin() + state.range(0));
+
+        // Create tensors
+        src1 = create_tensor(image_name, DataType::U8);
+        src2 = create_tensor(image_name, DataType::U8);
+        dst  = create_tensor(image_name, DataType::U8);
+
+        // Create and configure function
+        band.configure(&src1, &src2, &dst);
+
+        // Allocate tensors
+        src1.allocator()->allocate();
+        src2.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        // Fill source tensors
+        library->fill(NEAccessor(src1), image_name, Channel::R);
+        library->fill(NEAccessor(src2), image_name, Channel::G);
+    }
+
+    void TearDown(::benchmark::State &state) override
+    {
+        profiler.submit(state);
+    }
+
+    NEBitwiseAnd band{};
+    Profiler     profiler{};
+
+private:
+    Tensor src1{};
+    Tensor src2{};
+    Tensor dst{};
+};
+
+using BitwiseAndSmall = BitwiseAnd<SmallImages>;
+using BitwiseAndLarge = BitwiseAnd<LargeImages>;
+} // namespace
+
+BENCHMARK_DEFINE_F(BitwiseAndSmall, neon_bitwise_and)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        band.run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(BitwiseAndSmall, neon_bitwise_and)
+->Threads(1)
+->Apply(DataSetArgs<SmallImages>);
+
+BENCHMARK_DEFINE_F(BitwiseAndLarge, neon_bitwise_and)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        band.run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(BitwiseAndLarge, neon_bitwise_and)
+->Threads(1)
+->Apply(DataSetArgs<LargeImages>);
diff --git a/tests/benchmark/NEON/CMakeLists.txt b/tests/benchmark/NEON/CMakeLists.txt
new file mode 100644
index 0000000000..2cb3eb36c9
--- /dev/null
+++ b/tests/benchmark/NEON/CMakeLists.txt
@@ -0,0 +1,37 @@
+# Copyright (c) 2017 ARM Limited.
+#
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+cmake_minimum_required (VERSION 3.1)
+
+set(arm_compute_test_benchmark_NEON_SOURCE_FILES
+    ${CMAKE_SOURCE_DIR}/NEON/NEAccessor.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/Bitwise/BitwiseAnd.cpp
+)
+
+add_library(arm_compute_test_benchmark_NEON OBJECT
+    ${arm_compute_test_benchmark_NEON_SOURCE_FILES}
+)
+
+SET(arm_compute_test_benchmark_TARGET_OBJECTS
+    ${arm_compute_test_benchmark_TARGET_OBJECTS}
+    $<TARGET_OBJECTS:arm_compute_test_benchmark_NEON>
+    PARENT_SCOPE
+)
diff --git a/tests/benchmark/NEON/ConvolutionLayer.cpp b/tests/benchmark/NEON/ConvolutionLayer.cpp
new file mode 100644
index 0000000000..0cfff8494b
--- /dev/null
+++ b/tests/benchmark/NEON/ConvolutionLayer.cpp
@@ -0,0 +1,303 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "benchmark/Datasets.h"
+#include "benchmark/Profiler.h"
+#include "benchmark/WallClockTimer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "benchmark/benchmark_api.h"
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+using namespace arm_compute::test::neon;
+
+#include "benchmark/common/ConvolutionLayer.h"
+
+namespace
+{
+using ConvolutionLayerAlexNetF32 = ConvolutionLayer<AlexNetConvolutionLayerDataset, Tensor, NEAccessor, NEConvolutionLayer>;
+using ConvolutionLayerAlexNetQS8 = ConvolutionLayer<AlexNetConvolutionLayerDataset, Tensor, NEAccessor, NEConvolutionLayer, DataType::QS8>;
+using ConvolutionLayerLeNet5     = ConvolutionLayer<LeNet5ConvolutionLayerDataset, Tensor, NEAccessor, NEConvolutionLayer>;
+using ConvolutionLayerGoogLeNet1 = ConvolutionLayer<GoogLeNetConvolutionLayerDataset1, Tensor, NEAccessor, NEConvolutionLayer>;
+using ConvolutionLayerGoogLeNet2 = ConvolutionLayer<GoogLeNetConvolutionLayerDataset2, Tensor, NEAccessor, NEConvolutionLayer>;
+} // namespace
+
+// F32
+BENCHMARK_DEFINE_F(ConvolutionLayerAlexNetF32, neon_alexnet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        conv_layer->run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 1, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 2, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 3, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetF32, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 4, 1, 4, 8>);
+
+// QS8
+BENCHMARK_DEFINE_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        conv_layer->run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 1, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 2, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 3, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerAlexNetQS8, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 4, 1, 4, 8>);
+
+BENCHMARK_DEFINE_F(ConvolutionLayerLeNet5, neon_lenet5)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        conv_layer->run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(ConvolutionLayerLeNet5, neon_lenet5)
+->Threads(1)
+->Apply(DataSetArgBatched<LeNet5ConvolutionLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerLeNet5, neon_lenet5)
+->Threads(1)
+->Apply(DataSetArgBatched<LeNet5ConvolutionLayerDataset, 1, 1, 4, 8>);
+
+BENCHMARK_DEFINE_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        conv_layer->run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_DEFINE_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        conv_layer->run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 1, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 2, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 3, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 4, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 5, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 6, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 7, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 8, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 9, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 10, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 11, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 12, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 13, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 14, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 15, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 16, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 17, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 18, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 19, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 20, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 21, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 22, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 23, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 24, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 25, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 26, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 27, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 28, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 29, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 30, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset1, 31, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 1, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 2, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 3, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 4, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 5, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 6, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 7, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 8, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 9, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 10, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 11, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 12, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 13, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 14, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 15, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerGoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetConvolutionLayerDataset2, 16, 1, 4, 8>);
diff --git a/tests/benchmark/NEON/ConvolutionLayerDirect.cpp b/tests/benchmark/NEON/ConvolutionLayerDirect.cpp
new file mode 100644
index 0000000000..bc56e844d8
--- /dev/null
+++ b/tests/benchmark/NEON/ConvolutionLayerDirect.cpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "benchmark/Datasets.h"
+#include "benchmark/Profiler.h"
+#include "benchmark/WallClockTimer.h"
+#include "dataset/ConvolutionLayerDataset.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "benchmark/benchmark_api.h"
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+using namespace arm_compute::test::neon;
+
+#include "benchmark/common/ConvolutionLayer.h"
+
+namespace
+{
+using ConvolutionLayerDirectAlexNet = ConvolutionLayer<AlexNetConvolutionLayerDataset, Tensor, NEAccessor, NEDirectConvolutionLayer>;
+} // namespace
+
+BENCHMARK_DEFINE_F(ConvolutionLayerDirectAlexNet, neon_alexnet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        conv_layer->run();
+        profiler.stop();
+    }
+}
+
+// Registr only the 3x3 convolution layers
+BENCHMARK_REGISTER_F(ConvolutionLayerDirectAlexNet, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 2, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerDirectAlexNet, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 3, 1, 4, 8>);
+BENCHMARK_REGISTER_F(ConvolutionLayerDirectAlexNet, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetConvolutionLayerDataset, 4, 1, 4, 8>);
diff --git a/tests/benchmark/NEON/FullyConnectedLayer.cpp b/tests/benchmark/NEON/FullyConnectedLayer.cpp
new file mode 100644
index 0000000000..85979203ac
--- /dev/null
+++ b/tests/benchmark/NEON/FullyConnectedLayer.cpp
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "benchmark/Datasets.h"
+#include "benchmark/Profiler.h"
+#include "benchmark/WallClockTimer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "benchmark/benchmark_api.h"
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+using namespace arm_compute::test::neon;
+
+#include "benchmark/common/FullyConnectedLayer.h"
+
+namespace
+{
+using FullyConnectedLayerAlexNetF32 = FullyConnectedLayer<AlexNetFullyConnectedLayerDataset, Tensor, NEAccessor, NEFullyConnectedLayer>;
+using FullyConnectedLayerAlexNetQS8 = FullyConnectedLayer<AlexNetFullyConnectedLayerDataset, Tensor, NEAccessor, NEFullyConnectedLayer, DataType::QS8>;
+using FullyConnectedLayerLeNet5     = FullyConnectedLayer<LeNet5FullyConnectedLayerDataset, Tensor, NEAccessor, NEFullyConnectedLayer>;
+using FullyConnectedLayerGoogLeNet  = FullyConnectedLayer<GoogLeNetFullyConnectedLayerDataset, Tensor, NEAccessor, NEFullyConnectedLayer>;
+} // namespace
+
+// F32
+BENCHMARK_DEFINE_F(FullyConnectedLayerAlexNetF32, neon_alexnet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        fc_layer->run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetF32, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetF32, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 1, 1, 4, 8>);
+BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetF32, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 2, 1, 4, 8>);
+
+// QS8
+BENCHMARK_DEFINE_F(FullyConnectedLayerAlexNetQS8, neon_alexnet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        fc_layer->run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetQS8, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetQS8, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 1, 1, 4, 8>);
+BENCHMARK_REGISTER_F(FullyConnectedLayerAlexNetQS8, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetFullyConnectedLayerDataset, 2, 1, 4, 8>);
+
+BENCHMARK_DEFINE_F(FullyConnectedLayerLeNet5, neon_lenet5)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        fc_layer->run();
+        profiler.stop();
+    }
+}
+BENCHMARK_REGISTER_F(FullyConnectedLayerLeNet5, neon_lenet5)
+->Threads(1)
+->Apply(DataSetArgBatched<LeNet5FullyConnectedLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(FullyConnectedLayerLeNet5, neon_lenet5)
+->Threads(1)
+->Apply(DataSetArgBatched<LeNet5FullyConnectedLayerDataset, 1, 1, 4, 8>);
+
+BENCHMARK_DEFINE_F(FullyConnectedLayerGoogLeNet, neon_googlenet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        fc_layer->run();
+        profiler.stop();
+    }
+}
+BENCHMARK_REGISTER_F(FullyConnectedLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetFullyConnectedLayerDataset, 0, 1, 4, 8>);
diff --git a/tests/benchmark/NEON/GEMM.cpp b/tests/benchmark/NEON/GEMM.cpp
new file mode 100644
index 0000000000..9190309f1c
--- /dev/null
+++ b/tests/benchmark/NEON/GEMM.cpp
@@ -0,0 +1,709 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "benchmark/Datasets.h"
+#include "benchmark/Profiler.h"
+#include "benchmark/WallClockTimer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "benchmark/benchmark_api.h"
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+using namespace arm_compute::test::neon;
+
+#include "benchmark/NEON/GEMM.h"
+
+namespace
+{
+#ifdef ENABLE_FP16
+using GEMMFP16GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, Tensor, NEAccessor, NEGEMM, DataType::F16>;
+using GEMMFP16GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, Tensor, NEAccessor, NEGEMM, DataType::F16>;
+#endif /* ENABLE_FP16 */
+using GEMMFP32GoogLeNet1 = GEMM<GoogLeNetGEMMDataset1, Tensor, NEAccessor, NEGEMM, DataType::F32>;
+using GEMMFP32GoogLeNet2 = GEMM<GoogLeNetGEMMDataset2, Tensor, NEAccessor, NEGEMM, DataType::F32>;
+using GEMMQS8GoogLeNet1  = GEMM<GoogLeNetGEMMDataset1, Tensor, NEAccessor, NEGEMM, DataType::QS8>;
+using GEMMQS8GoogLeNet2  = GEMM<GoogLeNetGEMMDataset2, Tensor, NEAccessor, NEGEMM, DataType::QS8>;
+} // namespace
+#ifdef ENABLE_FP16
+BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet1, neon_googlenet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        gemm_layer->run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_DEFINE_F(GEMMFP16GoogLeNet2, neon_googlenet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        gemm_layer->run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>);
+BENCHMARK_REGISTER_F(GEMMFP16GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>);
+#endif /* ENABLE_FP16 */
+
+BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet1, neon_googlenet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        gemm_layer->run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_DEFINE_F(GEMMFP32GoogLeNet2, neon_googlenet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        gemm_layer->run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>);
+BENCHMARK_REGISTER_F(GEMMFP32GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>);
+
+BENCHMARK_DEFINE_F(GEMMQS8GoogLeNet1, neon_googlenet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        gemm_layer->run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_DEFINE_F(GEMMQS8GoogLeNet2, neon_googlenet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        gemm_layer->run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 0>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 1>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 2>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 3>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 4>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 5>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 6>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 7>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 8>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 9>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 10>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 11>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 12>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 13>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 14>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 15>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 16>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 17>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 18>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 19>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 20>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 21>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 22>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 23>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 24>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 25>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 26>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 27>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 28>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 29>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 30>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet1, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset1, 31>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 0>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 1>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 2>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 3>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 4>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 5>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 6>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 7>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 8>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 9>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 10>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 11>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 12>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 13>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 14>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 15>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 16>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 17>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 18>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 19>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 20>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 21>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 22>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 23>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 24>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 25>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 26>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 27>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 28>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 29>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 30>);
+BENCHMARK_REGISTER_F(GEMMQS8GoogLeNet2, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArg<GoogLeNetGEMMDataset2, 31>);
diff --git a/tests/benchmark/NEON/GEMM.h b/tests/benchmark/NEON/GEMM.h
new file mode 100644
index 0000000000..24d196523f
--- /dev/null
+++ b/tests/benchmark/NEON/GEMM.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_BENCHMARK_NEON_GEMM_H__
+#define __ARM_COMPUTE_TEST_BENCHMARK_NEON_GEMM_H__
+
+#include "TensorLibrary.h"
+#include "Utils.h"
+#include "dataset/GEMMDataset.h"
+
+#include <memory>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+// FIXME: Merge with CL/GEMM.h into common/GEMM.h after adding F16 support to NEON GEMM and QS8 support to CL GEMM
+template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType data_type>
+class GEMM : public ::benchmark::Fixture
+{
+public:
+    void SetUp(::benchmark::State &state) override
+    {
+#ifdef ENABLE_FP16
+        ARM_COMPUTE_ERROR_ON_MSG(data_type != DataType::F16 && data_type != DataType::F32 && data_type != DataType::QS8, "Unsupported data type for GEMM operation");
+#else  /* ENABLE_FP16 */
+        ARM_COMPUTE_ERROR_ON_MSG(data_type != DataType::F32 && data_type != DataType::QS8, "Unsupported data type for GEMM operation");
+#endif /* ENABLE_FP16 */
+
+        profiler.add(std::make_shared<WallClockTimer>());
+
+        const GEMMDataObject gemm_obj = *(DataSet().begin() + state.range(0));
+
+        TensorShape shape_a = gemm_obj.shape_a;
+        TensorShape shape_b = gemm_obj.shape_b;
+        TensorShape shape_c = gemm_obj.shape_c;
+        TensorShape shape_d = gemm_obj.shape_d;
+
+        // Create tensors
+        a = create_tensor(shape_a, data_type, 1, 4);
+        b = create_tensor(shape_b, data_type, 1, 4);
+        c = create_tensor(shape_c, data_type, 1, 4);
+        d = create_tensor(shape_d, data_type, 1, 4);
+
+        // Create and configure function
+        gemm_layer = std::unique_ptr<Function>(new Function());
+        gemm_layer->configure(&a, &b, &c, &d, gemm_obj.alpha, gemm_obj.beta);
+
+        // Allocate tensors
+        a.allocator()->allocate();
+        b.allocator()->allocate();
+        c.allocator()->allocate();
+        d.allocator()->allocate();
+    }
+
+    void TearDown(::benchmark::State &state) override
+    {
+        gemm_layer.reset();
+
+        a.allocator()->free();
+        b.allocator()->free();
+        c.allocator()->free();
+        d.allocator()->free();
+
+        profiler.submit(state);
+    }
+
+    std::unique_ptr<Function> gemm_layer{ nullptr };
+    Profiler                  profiler{};
+
+private:
+    TensorType a{};
+    TensorType b{};
+    TensorType c{};
+    TensorType d{};
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif //__ARM_COMPUTE_TEST_BENCHMARK_NEON_GEMM_H__
diff --git a/tests/benchmark/NEON/NormalizationLayer.cpp b/tests/benchmark/NEON/NormalizationLayer.cpp
new file mode 100644
index 0000000000..46dc56b84d
--- /dev/null
+++ b/tests/benchmark/NEON/NormalizationLayer.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "benchmark/Datasets.h"
+#include "benchmark/Profiler.h"
+#include "benchmark/WallClockTimer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NENormalizationLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "benchmark/benchmark_api.h"
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+using namespace arm_compute::test::neon;
+
+#include "benchmark/common/NormalizationLayer.h"
+
+namespace
+{
+using NormalizationLayerAlexNetF32 = NormalizationLayer<AlexNetNormalizationLayerDataset, Tensor, NEAccessor, NENormalizationLayer>;
+using NormalizationLayerAlexNetQS8 = NormalizationLayer<AlexNetNormalizationLayerDataset, Tensor, NEAccessor, NENormalizationLayer, DataType::QS8>;
+using NormalizationLayerGoogLeNet  = NormalizationLayer<GoogLeNetNormalizationLayerDataset, Tensor, NEAccessor, NENormalizationLayer>;
+} // namespace
+
+// F32
+BENCHMARK_DEFINE_F(NormalizationLayerAlexNetF32, neon_alexnet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        norm_layer->run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(NormalizationLayerAlexNetF32, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(NormalizationLayerAlexNetF32, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 1, 1, 4, 8>);
+
+// QS8
+BENCHMARK_DEFINE_F(NormalizationLayerAlexNetQS8, neon_alexnet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        norm_layer->run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(NormalizationLayerAlexNetQS8, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(NormalizationLayerAlexNetQS8, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetNormalizationLayerDataset, 1, 1, 4, 8>);
+
+BENCHMARK_DEFINE_F(NormalizationLayerGoogLeNet, neon_googlenet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        norm_layer->run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(NormalizationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetNormalizationLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(NormalizationLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetNormalizationLayerDataset, 1, 1, 4, 8>);
diff --git a/tests/benchmark/NEON/PoolingLayer.cpp b/tests/benchmark/NEON/PoolingLayer.cpp
new file mode 100644
index 0000000000..9b071317b4
--- /dev/null
+++ b/tests/benchmark/NEON/PoolingLayer.cpp
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "benchmark/Datasets.h"
+#include "benchmark/Profiler.h"
+#include "benchmark/WallClockTimer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "benchmark/benchmark_api.h"
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+using namespace arm_compute::test::neon;
+
+#include "benchmark/common/PoolingLayer.h"
+
+namespace
+{
+using PoolingLayerAlexNetF32 = PoolingLayer<AlexNetPoolingLayerDataset, Tensor, NEAccessor, NEPoolingLayer>;
+using PoolingLayerAlexNetQS8 = PoolingLayer<AlexNetPoolingLayerDataset, Tensor, NEAccessor, NEPoolingLayer, DataType::QS8>;
+using PoolingLayerLeNet5     = PoolingLayer<LeNet5PoolingLayerDataset, Tensor, NEAccessor, NEPoolingLayer>;
+using PoolingLayerGoogLeNet  = PoolingLayer<GoogLeNetPoolingLayerDataset, Tensor, NEAccessor, NEPoolingLayer>;
+} // namespace
+
+// F32
+BENCHMARK_DEFINE_F(PoolingLayerAlexNetF32, neon_alexnet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        pool_layer.run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(PoolingLayerAlexNetF32, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerAlexNetF32, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 1, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerAlexNetF32, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 2, 1, 4, 8>);
+
+// QS8
+BENCHMARK_DEFINE_F(PoolingLayerAlexNetQS8, neon_alexnet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        pool_layer.run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(PoolingLayerAlexNetQS8, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerAlexNetQS8, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 1, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerAlexNetQS8, neon_alexnet)
+->Threads(1)
+->Apply(DataSetArgBatched<AlexNetPoolingLayerDataset, 2, 1, 4, 8>);
+
+BENCHMARK_DEFINE_F(PoolingLayerLeNet5, neon_lenet5)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        pool_layer.run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(PoolingLayerLeNet5, neon_lenet5)
+->Threads(1)
+->Apply(DataSetArgBatched<LeNet5PoolingLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerLeNet5, neon_lenet5)
+->Threads(1)
+->Apply(DataSetArgBatched<LeNet5PoolingLayerDataset, 1, 1, 4, 8>);
+
+BENCHMARK_DEFINE_F(PoolingLayerGoogLeNet, neon_googlenet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run function
+        profiler.start();
+        pool_layer.run();
+        profiler.stop();
+    }
+}
+
+// FIXME: Add support for 7x7 pooling layer pool5/7x7_s1
+BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 0, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 1, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 2, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 3, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 4, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 5, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 6, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 7, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 8, 1, 4, 8>);
+BENCHMARK_REGISTER_F(PoolingLayerGoogLeNet, neon_googlenet)
+->Threads(1)
+->Apply(DataSetArgBatched<GoogLeNetPoolingLayerDataset, 9, 1, 4, 8>);
diff --git a/tests/benchmark/PMUCounter.cpp b/tests/benchmark/PMUCounter.cpp
new file mode 100644
index 0000000000..e87dae82e6
--- /dev/null
+++ b/tests/benchmark/PMUCounter.cpp
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "PMUCounter.h"
+
+#include "Utils.h"
+
+#define _GNU_SOURCE 1
+#include <asm/unistd.h>
+#include <csignal>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <fcntl.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/perf_event.h>
+#include <stdexcept>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+CycleCounter::CycleCounter()
+{
+    const pid_t pid = getpid();
+
+    struct perf_event_attr perf_config
+    {
+    };
+    memset(&perf_config, 0, sizeof(struct perf_event_attr));
+
+    perf_config.config = PERF_COUNT_HW_CPU_CYCLES;
+    perf_config.size   = sizeof(struct perf_event_attr);
+    perf_config.type   = PERF_TYPE_HARDWARE;
+    // The inherit bit specifies that this counter should count events of child
+    // tasks as well as the task specified
+    perf_config.inherit = 1;
+    // Enables saving of event counts on context switch for inherited tasks
+    perf_config.inherit_stat = 1;
+
+    _fd = syscall(__NR_perf_event_open, &perf_config, pid, -1, -1, 0);
+
+    if(_fd < 0)
+    {
+        throw std::runtime_error("perf_event_open for cycles failed");
+    }
+}
+
+std::string CycleCounter::id() const
+{
+    return "Cycle Counter";
+}
+
+void CycleCounter::start()
+{
+    ioctl(_fd, PERF_EVENT_IOC_RESET, 0);
+    ioctl(_fd, PERF_EVENT_IOC_ENABLE, 0);
+}
+
+void CycleCounter::stop()
+{
+    ioctl(_fd, PERF_EVENT_IOC_DISABLE, 0);
+    read(_fd, &_cycles, sizeof(_cycles));
+}
+
+std::unique_ptr<Instrument::IMeasurement> CycleCounter::get_measurement() const
+{
+    return ::arm_compute::test::cpp14::make_unique<Instrument::Measurement<long long>>(_cycles);
+}
+
+InstructionCounter::InstructionCounter()
+{
+    const pid_t pid = getpid();
+
+    struct perf_event_attr perf_config
+    {
+    };
+    memset(&perf_config, 0, sizeof(struct perf_event_attr));
+
+    perf_config.config = PERF_COUNT_HW_INSTRUCTIONS;
+    perf_config.size   = sizeof(struct perf_event_attr);
+    perf_config.type   = PERF_TYPE_HARDWARE;
+    // The inherit bit specifies that this counter should count events of child
+    // tasks as well as the task specified
+    perf_config.inherit = 1;
+    // Enables saving of event counts on context switch for inherited tasks
+    perf_config.inherit_stat = 1;
+
+    _fd = syscall(__NR_perf_event_open, &perf_config, pid, -1, -1, 0);
+
+    if(_fd < 0)
+    {
+        throw std::runtime_error("perf_event_open for instructions failed");
+    }
+}
+
+std::string InstructionCounter::id() const
+{
+    return "Instruction Counter";
+}
+
+void InstructionCounter::start()
+{
+    ioctl(_fd, PERF_EVENT_IOC_RESET, 0);
+    ioctl(_fd, PERF_EVENT_IOC_ENABLE, 0);
+}
+
+void InstructionCounter::stop()
+{
+    ioctl(_fd, PERF_EVENT_IOC_DISABLE, 0);
+    read(_fd, &_instructions, sizeof(_instructions));
+}
+
+std::unique_ptr<Instrument::IMeasurement> InstructionCounter::get_measurement() const
+{
+    return std::unique_ptr<Instrument::IMeasurement>(new Instrument::Measurement<long long>(_instructions));
+}
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/PMUCounter.h b/tests/benchmark/PMUCounter.h
new file mode 100644
index 0000000000..de45f319f6
--- /dev/null
+++ b/tests/benchmark/PMUCounter.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_BENCHMARK_PMU_COUNTER_H__
+#define __ARM_COMPUTE_TEST_BENCHMARK_PMU_COUNTER_H__
+
+#include "Instrument.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+/** Implementation of an instrument to count CPU cycles. */
+class CycleCounter : public Instrument
+{
+public:
+    /** Initialise the cycle counter. */
+    CycleCounter();
+
+    std::string                               id() const override;
+    void                                      start() override;
+    void                                      stop() override;
+    std::unique_ptr<Instrument::IMeasurement> get_measurement() const override;
+
+private:
+    long      _fd{ -1 };
+    long long _cycles{ 0 };
+};
+
+/** Implementation of an instrument to count executed CPU instructions. */
+class InstructionCounter : public Instrument
+{
+public:
+    /** Initialise the instruction counter. */
+    InstructionCounter();
+
+    std::string                               id() const override;
+    void                                      start() override;
+    void                                      stop() override;
+    std::unique_ptr<Instrument::IMeasurement> get_measurement() const override;
+
+private:
+    long      _fd{ -1 };
+    long long _instructions{ 0 };
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/benchmark/PerformanceProgramOptions.cpp b/tests/benchmark/PerformanceProgramOptions.cpp
new file mode 100644
index 0000000000..b4becc3c69
--- /dev/null
+++ b/tests/benchmark/PerformanceProgramOptions.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "PerformanceProgramOptions.h"
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Weffc++"
+#pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
+#pragma GCC diagnostic ignored "-Wctor-dtor-privacy"
+#include "boost/program_options.hpp"
+#pragma GCC diagnostic pop
+
+namespace arm_compute
+{
+namespace test
+{
+namespace performance
+{
+PerformanceProgramOptions::PerformanceProgramOptions()
+{
+    boost::program_options::options_description options("Performance options");
+    options.add_options()("runs", boost::program_options::value<unsigned int>()->default_value(1), "Repetitions per test");
+    options.add_options()("threads", boost::program_options::value<unsigned int>()->default_value(1), "Number of parallel CPU threads");
+    add_options(options);
+}
+} // namespace performance
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/PerformanceProgramOptions.h b/tests/benchmark/PerformanceProgramOptions.h
new file mode 100644
index 0000000000..671e263bb2
--- /dev/null
+++ b/tests/benchmark/PerformanceProgramOptions.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_PERFORMANCE_PROGRAM_OPTIONS_H__
+#define __ARM_COMPUTE_TEST_PERFORMANCE_PROGRAM_OPTIONS_H__
+
+#include "ProgramOptions.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace performance
+{
+/** Subclass of @ref ProgramOptions that adds performance specific options. */
+class PerformanceProgramOptions : public ProgramOptions
+{
+public:
+    /** Defines additonal options. */
+    PerformanceProgramOptions();
+};
+} // namespace performance
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/benchmark/PerformanceUserConfiguration.cpp b/tests/benchmark/PerformanceUserConfiguration.cpp
new file mode 100644
index 0000000000..ca412d660a
--- /dev/null
+++ b/tests/benchmark/PerformanceUserConfiguration.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "PerformanceUserConfiguration.h"
+
+#include "ProgramOptions.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace performance
+{
+PerformanceUserConfiguration::PerformanceUserConfiguration(const ProgramOptions &options)
+    : UserConfiguration(options)
+{
+    unsigned int tmp_runs = 0;
+    if(options.get("runs", tmp_runs))
+    {
+        runs = tmp_runs;
+    }
+}
+} // namespace performance
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/PerformanceUserConfiguration.h b/tests/benchmark/PerformanceUserConfiguration.h
new file mode 100644
index 0000000000..a140d404c8
--- /dev/null
+++ b/tests/benchmark/PerformanceUserConfiguration.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_PERFORMANCE_PERFORMANCE_USER_CONFIGURATION_H__
+#define __ARM_COMPUTE_TEST_PERFORMANCE_PERFORMANCE_USER_CONFIGURATION_H__
+
+#include "UserConfiguration.h"
+
+namespace arm_compute
+{
+namespace test
+{
+class ProgramOptions;
+
+namespace performance
+{
+/** Specialisation of @ref UserConfiguration to provide performance specific
+ * configuration options.
+ */
+struct PerformanceUserConfiguration : public UserConfiguration
+{
+    PerformanceUserConfiguration() = default;
+
+    /** Initialise the configuration according to the program options.
+     *
+     * @param[in] options Parsed command line options.
+     */
+    PerformanceUserConfiguration(const ProgramOptions &options);
+
+    Option<unsigned int> runs{};
+};
+} // namespace performance
+
+extern performance::PerformanceUserConfiguration user_config;
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/benchmark/Profiler.cpp b/tests/benchmark/Profiler.cpp
new file mode 100644
index 0000000000..f3ce94164f
--- /dev/null
+++ b/tests/benchmark/Profiler.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Profiler.h"
+
+#include <iostream>
+#include <utility>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+void Profiler::add(const std::shared_ptr<Instrument> &instrument)
+{
+    _instruments.push_back(instrument);
+}
+
+void Profiler::start()
+{
+    for(auto &instrument : _instruments)
+    {
+        instrument->start();
+    }
+}
+
+void Profiler::stop()
+{
+    for(auto &instrument : _instruments)
+    {
+        instrument->stop();
+    }
+
+    for(const auto &instrument : _instruments)
+    {
+        _measurements[instrument->id()].push_back(*instrument->get_measurement());
+    }
+}
+
+void Profiler::submit(::benchmark::State &state)
+{
+    for(auto &instrument : _measurements)
+    {
+        double sum_values = std::accumulate(instrument.second.begin(), instrument.second.end(), 0.);
+        size_t num_values = instrument.second.size();
+
+        if(num_values > 2)
+        {
+            auto minmax_values                        = std::minmax_element(instrument.second.begin(), instrument.second.end());
+            state.counters[instrument.first + "_min"] = *minmax_values.first;
+            state.counters[instrument.first + "_max"] = *minmax_values.second;
+            sum_values -= *minmax_values.first + *minmax_values.second;
+            num_values -= 2;
+        }
+        state.counters[instrument.first] = sum_values / num_values;
+        instrument.second.clear();
+    }
+}
+
+const Profiler::MeasurementsMap &Profiler::measurements() const
+{
+    return _measurements;
+}
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/Profiler.h b/tests/benchmark/Profiler.h
new file mode 100644
index 0000000000..03922f4704
--- /dev/null
+++ b/tests/benchmark/Profiler.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_BENCHMARK_PROFILER_H__
+#define __ARM_COMPUTE_TEST_BENCHMARK_PROFILER_H__
+
+#include "Instrument.h"
+
+#include "benchmark/benchmark_api.h"
+
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+class Profiler
+{
+public:
+    /** Mapping from instrument ids to their measurements. */
+    using MeasurementsMap = std::map<std::string, std::vector<double>>;
+
+    /** Add @p instrument to the performance montior.
+     *
+     * All added instruments will be used when @ref start or @ref stop are
+     * called to make measurements.
+     *
+     * @param[in] instrument Instrument to be used to measure performance.
+     */
+    void add(const std::shared_ptr<Instrument> &instrument);
+
+    /** Start all added instruments to measure performance. */
+    void start();
+
+    /** Stop all added instruments. */
+    void stop();
+
+    /** Commit all measured values to the current active test. */
+    void submit(::benchmark::State &state);
+
+    /** Return measurements for all instruments. */
+    const MeasurementsMap &measurements() const;
+
+private:
+    std::vector<std::shared_ptr<Instrument>> _instruments{};
+    MeasurementsMap                          _measurements{};
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/benchmark/WallClockTimer.cpp b/tests/benchmark/WallClockTimer.cpp
new file mode 100644
index 0000000000..9ab53d0b3c
--- /dev/null
+++ b/tests/benchmark/WallClockTimer.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "WallClockTimer.h"
+
+#include "Utils.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+std::string WallClockTimer::id() const
+{
+    return "Wall clock";
+}
+
+void WallClockTimer::start()
+{
+    _start = std::chrono::high_resolution_clock::now();
+}
+
+void WallClockTimer::stop()
+{
+    _stop = std::chrono::high_resolution_clock::now();
+}
+
+std::unique_ptr<Instrument::IMeasurement> WallClockTimer::get_measurement() const
+{
+    const std::chrono::duration<float, std::milli> delta = _stop - _start;
+    return ::arm_compute::test::cpp14::make_unique<Instrument::Measurement<float>>(delta.count());
+}
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/WallClockTimer.h b/tests/benchmark/WallClockTimer.h
new file mode 100644
index 0000000000..cf6828e88d
--- /dev/null
+++ b/tests/benchmark/WallClockTimer.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_BENCHMARK_WALL_CLOCK_TIMER_H__
+#define __ARM_COMPUTE_TEST_BENCHMARK_WALL_CLOCK_TIMER_H__
+
+#include "Instrument.h"
+
+#include <chrono>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+/** Implementation of an instrument to measure elapsed wall-clock time in milliseconds. */
+class WallClockTimer : public Instrument
+{
+public:
+    std::string                               id() const override;
+    void                                      start() override;
+    void                                      stop() override;
+    std::unique_ptr<Instrument::IMeasurement> get_measurement() const override;
+
+private:
+    std::chrono::high_resolution_clock::time_point _start{};
+    std::chrono::high_resolution_clock::time_point _stop{};
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/benchmark/common/ActivationLayer.h b/tests/benchmark/common/ActivationLayer.h
new file mode 100644
index 0000000000..7edfb6ef3c
--- /dev/null
+++ b/tests/benchmark/common/ActivationLayer.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_BENCHMARK_ACTIVATION_LAYER_H__
+#define __ARM_COMPUTE_TEST_BENCHMARK_ACTIVATION_LAYER_H__
+
+#include "TensorLibrary.h"
+#include "Utils.h"
+#include "dataset/ActivationLayerDataset.h"
+
+#include <memory>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType dt = DataType::F32>
+class ActivationLayer : public ::benchmark::Fixture
+{
+public:
+    void SetUp(::benchmark::State &state) override
+    {
+        profiler.add(std::make_shared<WallClockTimer>());
+
+        const ActivationLayerDataObject act_obj = *(DataSet().begin() + state.range(0));
+
+        // Set batched in source and destination shapes
+        const unsigned int batches              = state.range(1);
+        const unsigned int fixed_point_position = 4;
+        TensorShape        shape                = act_obj.shape;
+        shape.set(shape.num_dimensions(), batches);
+
+        // Create tensors
+        src = create_tensor(shape, dt, 1, fixed_point_position);
+        dst = create_tensor(shape, dt, 1, fixed_point_position);
+
+        // Create and configure function
+        act_layer.configure(&src, &dst, act_obj.info);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        // Fill tensors
+        library->fill_tensor_uniform(Accessor(src), 0);
+    }
+
+    void TearDown(::benchmark::State &state) override
+    {
+        src.allocator()->free();
+        dst.allocator()->free();
+
+        profiler.submit(state);
+    }
+
+    Function act_layer{};
+    Profiler profiler{};
+
+private:
+    TensorType src{};
+    TensorType dst{};
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif //__ARM_COMPUTE_TEST_BENCHMARK_ACTIVATION_LAYER_H__
diff --git a/tests/benchmark/common/ConvolutionLayer.h b/tests/benchmark/common/ConvolutionLayer.h
new file mode 100644
index 0000000000..594c62c50e
--- /dev/null
+++ b/tests/benchmark/common/ConvolutionLayer.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_BENCHMARK_CONVOLUTION_LAYER_H__
+#define __ARM_COMPUTE_TEST_BENCHMARK_CONVOLUTION_LAYER_H__
+
+#include "TensorLibrary.h"
+#include "Utils.h"
+#include "dataset/ConvolutionLayerDataset.h"
+
+#include <memory>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType dt = DataType::F32>
+class ConvolutionLayer : public ::benchmark::Fixture
+{
+public:
+    void SetUp(::benchmark::State &state) override
+    {
+        profiler.add(std::make_shared<WallClockTimer>());
+
+        const ConvolutionLayerDataObject conv_obj = *(DataSet().begin() + state.range(0));
+
+        // Set batched in source and destination shapes
+        const unsigned int batches              = state.range(1);
+        const unsigned int fixed_point_position = 4;
+        TensorShape        src_shape            = conv_obj.src_shape;
+        TensorShape        dst_shape            = conv_obj.dst_shape;
+        src_shape.set(3 /* batch */, batches);
+        dst_shape.set(3 /* batch */, batches);
+
+        // Create tensors
+        src     = create_tensor(src_shape, dt, 1, fixed_point_position);
+        weights = create_tensor(conv_obj.weights_shape, dt, 1, fixed_point_position);
+        bias    = create_tensor(conv_obj.bias_shape, dt, 1, fixed_point_position);
+        dst     = create_tensor(dst_shape, dt, 1, fixed_point_position);
+
+        // Create and configure function
+        conv_layer = std::unique_ptr<Function>(new Function());
+        conv_layer->configure(&src, &weights, &bias, &dst, conv_obj.info);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        weights.allocator()->allocate();
+        bias.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        // Fill tensors
+        library->fill_tensor_uniform(Accessor(src), 0);
+        library->fill_tensor_uniform(Accessor(weights), 1);
+        library->fill_tensor_uniform(Accessor(bias), 2);
+    }
+
+    void TearDown(::benchmark::State &state) override
+    {
+        conv_layer.reset();
+
+        src.allocator()->free();
+        weights.allocator()->free();
+        bias.allocator()->free();
+        dst.allocator()->free();
+
+        profiler.submit(state);
+    }
+
+    std::unique_ptr<Function> conv_layer{ nullptr };
+    Profiler                  profiler{};
+
+private:
+    TensorType src{};
+    TensorType weights{};
+    TensorType bias{};
+    TensorType dst{};
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif //__ARM_COMPUTE_TEST_BENCHMARK_CONVOLUTION_LAYER_H__
diff --git a/tests/benchmark/common/FullyConnectedLayer.h b/tests/benchmark/common/FullyConnectedLayer.h
new file mode 100644
index 0000000000..88adf83f2a
--- /dev/null
+++ b/tests/benchmark/common/FullyConnectedLayer.h
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_BENCHMARK_FULLYCONNECTED_LAYER_H__
+#define __ARM_COMPUTE_TEST_BENCHMARK_FULLYCONNECTED_LAYER_H__
+
+#include "TensorLibrary.h"
+#include "Utils.h"
+#include "dataset/ConvolutionLayerDataset.h"
+
+#include <memory>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType dt = DataType::F32>
+class FullyConnectedLayer : public ::benchmark::Fixture
+{
+public:
+    void SetUp(::benchmark::State &state) override
+    {
+        profiler.add(std::make_shared<WallClockTimer>());
+
+        const FullyConnectedLayerDataObject fc_obj = *(DataSet().begin() + state.range(0));
+
+        // Set batched in source and destination shapes
+        const unsigned int batches              = state.range(1);
+        const unsigned int fixed_point_position = 4;
+        TensorShape        src_shape            = fc_obj.src_shape;
+        TensorShape        dst_shape            = fc_obj.dst_shape;
+        src_shape.set(src_shape.num_dimensions(), batches);
+        dst_shape.set(dst_shape.num_dimensions(), batches);
+
+        // Create tensors
+        src     = create_tensor(src_shape, dt, 1, fixed_point_position);
+        weights = create_tensor(fc_obj.weights_shape, dt, 1, fixed_point_position);
+        bias    = create_tensor(fc_obj.bias_shape, dt, 1, fixed_point_position);
+        dst     = create_tensor(dst_shape, dt, 1, fixed_point_position);
+
+        // Create and configure function
+        fc_layer = std::unique_ptr<Function>(new Function());
+        fc_layer->configure(&src, &weights, &bias, &dst);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        weights.allocator()->allocate();
+        bias.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        // Fill tensors
+        library->fill_tensor_uniform(Accessor(src), 0);
+        library->fill_tensor_uniform(Accessor(weights), 1);
+        library->fill_tensor_uniform(Accessor(bias), 2);
+    }
+
+    void TearDown(::benchmark::State &state) override
+    {
+        fc_layer.reset();
+
+        src.allocator()->free();
+        weights.allocator()->free();
+        bias.allocator()->free();
+        dst.allocator()->free();
+
+        profiler.submit(state);
+    }
+
+    std::unique_ptr<Function> fc_layer{ nullptr };
+    Profiler                  profiler{};
+
+private:
+    TensorType src{};
+    TensorType weights{};
+    TensorType bias{};
+    TensorType dst{};
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif //__ARM_COMPUTE_TEST_BENCHMARK_FULLYCONNECTED_LAYER_H__
diff --git a/tests/benchmark/common/NormalizationLayer.h b/tests/benchmark/common/NormalizationLayer.h
new file mode 100644
index 0000000000..4593fb7df3
--- /dev/null
+++ b/tests/benchmark/common/NormalizationLayer.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_BENCHMARK_NORMALIZATION_LAYER_H__
+#define __ARM_COMPUTE_TEST_BENCHMARK_NORMALIZATION_LAYER_H__
+
+#include "TensorLibrary.h"
+#include "Utils.h"
+#include "dataset/NormalizationLayerDataset.h"
+
+#include <memory>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType dt = DataType::F32>
+class NormalizationLayer : public ::benchmark::Fixture
+{
+public:
+    void SetUp(::benchmark::State &state) override
+    {
+        profiler.add(std::make_shared<WallClockTimer>());
+
+        const NormalizationLayerDataObject norm_obj = *(DataSet().begin() + state.range(0));
+
+        // Set batched in source and destination shapes
+        const unsigned int batches              = state.range(1);
+        const unsigned int fixed_point_position = 4;
+        TensorShape        shape                = norm_obj.shape;
+        shape.set(shape.num_dimensions(), batches);
+
+        // Create tensors
+        src = create_tensor(shape, dt, 1, fixed_point_position);
+        dst = create_tensor(shape, dt, 1, fixed_point_position);
+
+        // Create and configure function
+        norm_layer = std::unique_ptr<Function>(new Function());
+        norm_layer->configure(&src, &dst, norm_obj.info);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        // Fill tensors
+        library->fill_tensor_uniform(Accessor(src), 0);
+    }
+
+    void TearDown(::benchmark::State &state) override
+    {
+        norm_layer.reset();
+
+        src.allocator()->free();
+        dst.allocator()->free();
+
+        profiler.submit(state);
+    }
+
+    std::unique_ptr<Function> norm_layer{ nullptr };
+    Profiler                  profiler{};
+
+private:
+    TensorType src{};
+    TensorType dst{};
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif //__ARM_COMPUTE_TEST_BENCHMARK_NORMALIZATION_LAYER_H__
diff --git a/tests/benchmark/common/PoolingLayer.h b/tests/benchmark/common/PoolingLayer.h
new file mode 100644
index 0000000000..5bb332fd6b
--- /dev/null
+++ b/tests/benchmark/common/PoolingLayer.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_BENCHMARK_POOLING_LAYER_H__
+#define __ARM_COMPUTE_TEST_BENCHMARK_POOLING_LAYER_H__
+
+#include "TensorLibrary.h"
+#include "Utils.h"
+#include "dataset/PoolingLayerDataset.h"
+
+#include <memory>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+template <typename DataSet, typename TensorType, typename Accessor, typename Function, DataType dt = DataType::F32>
+class PoolingLayer : public ::benchmark::Fixture
+{
+public:
+    void SetUp(::benchmark::State &state) override
+    {
+        profiler.add(std::make_shared<WallClockTimer>());
+
+        const PoolingLayerDataObject pool_obj = *(DataSet().begin() + state.range(0));
+
+        // Set batched in source and destination shapes
+        const unsigned int batches              = state.range(1);
+        const unsigned int fixed_point_position = 4;
+        TensorShape        src_shape            = pool_obj.src_shape;
+        TensorShape        dst_shape            = pool_obj.dst_shape;
+        src_shape.set(src_shape.num_dimensions(), batches);
+        dst_shape.set(dst_shape.num_dimensions(), batches);
+
+        // Create tensors
+        src = create_tensor(src_shape, dt, 1, fixed_point_position);
+        dst = create_tensor(dst_shape, dt, 1, fixed_point_position);
+
+        // Create and configure function
+        pool_layer.configure(&src, &dst, pool_obj.info);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        // Fill tensors
+        library->fill_tensor_uniform(Accessor(src), 0);
+    }
+
+    void TearDown(::benchmark::State &state) override
+    {
+        // Free allocators
+        src.allocator()->free();
+        dst.allocator()->free();
+
+        profiler.submit(state);
+    }
+
+    Function pool_layer{};
+    Profiler profiler{};
+
+private:
+    TensorType src{};
+    TensorType dst{};
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif //__ARM_COMPUTE_TEST_BENCHMARK_POOLING_LAYER_H__
diff --git a/tests/benchmark/main.cpp b/tests/benchmark/main.cpp
new file mode 100644
index 0000000000..acde259d9b
--- /dev/null
+++ b/tests/benchmark/main.cpp
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "PMUCounter.h"
+#include "PerformanceProgramOptions.h"
+#include "PerformanceUserConfiguration.h"
+#include "TensorLibrary.h"
+#include "Utils.h"
+#include "WallClockTimer.h"
+
+#include "benchmark/benchmark_api.h"
+
+#ifdef OPENCL
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#endif
+#include "arm_compute/runtime/Scheduler.h"
+
+#include <iostream>
+#include <memory>
+
+using namespace arm_compute::test;
+using namespace arm_compute::test::performance;
+
+namespace arm_compute
+{
+namespace test
+{
+PerformanceUserConfiguration   user_config;
+std::unique_ptr<TensorLibrary> library;
+} // namespace test
+} // namespace arm_compute
+
+int main(int argc, char **argv)
+{
+    PerformanceProgramOptions options;
+    try
+    {
+        options.parse_commandline(argc, argv);
+
+        if(options.wants_help())
+        {
+            std::cout << "Usage: " << argv[0] << " [options] PATH\n";
+            std::cout << options.get_help() << "\n";
+        }
+
+        user_config = PerformanceUserConfiguration(options);
+    }
+    catch(const boost::program_options::required_option &err)
+    {
+        std::cerr << "Error: " << err.what() << "\n";
+        std::cout << "\nUsage: " << argv[0] << " [options] PATH\n";
+        std::cout << options.get_help() << "\n";
+        return 1;
+    }
+
+    ::benchmark::Initialize(&argc, argv);
+
+    if(user_config.seed.is_set())
+    {
+        library = cpp14::make_unique<TensorLibrary>(user_config.path.get(), user_config.seed);
+    }
+    else
+    {
+        library = cpp14::make_unique<TensorLibrary>(user_config.path.get());
+    }
+
+#ifdef OPENCL
+    arm_compute::CLScheduler::get().default_init();
+#endif
+
+    std::cout << "Using " << user_config.threads << " CPU " << (user_config.threads == 1 ? "thread" : "threads") << "\n";
+    arm_compute::Scheduler::get().set_num_threads(user_config.threads);
+
+    ::benchmark::RunSpecifiedBenchmarks();
+}
diff --git a/tests/benchmark/system_tests/CL/AlexNet.cpp b/tests/benchmark/system_tests/CL/AlexNet.cpp
new file mode 100644
index 0000000000..fe0b9913de
--- /dev/null
+++ b/tests/benchmark/system_tests/CL/AlexNet.cpp
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CL/CLAccessor.h"
+#include "CL/Helper.h"
+#include "Globals.h"
+#include "TensorLibrary.h"
+#include "benchmark/Datasets.h"
+#include "benchmark/Profiler.h"
+#include "benchmark/WallClockTimer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/CLSubTensor.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
+#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
+#include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h"
+#include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h"
+
+#include "benchmark/benchmark_api.h"
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+using namespace arm_compute::test::cl;
+
+#include "benchmark/system_tests/common/AlexNet.h"
+
+namespace
+{
+using AlexNetSystemTest = AlexNetFixture<ICLTensor,
+      CLTensor,
+      CLSubTensor,
+      CLAccessor,
+      CLActivationLayer,
+      CLConvolutionLayer,
+      CLFullyConnectedLayer,
+      CLNormalizationLayer,
+      CLPoolingLayer,
+      CLSoftmaxLayer>;
+} // namespace
+
+BENCHMARK_DEFINE_F(AlexNetSystemTest, cl_alexnet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run AlexNet
+        profiler.start();
+        network.run();
+        CLScheduler::get().sync();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(AlexNetSystemTest, cl_alexnet)
+->Threads(1)
+->Iterations(10)
+->ArgName("batch_size")
+->Arg(1)
+->Arg(4)
+->Arg(8);
\ No newline at end of file
diff --git a/tests/benchmark/system_tests/CL/LeNet5.cpp b/tests/benchmark/system_tests/CL/LeNet5.cpp
new file mode 100644
index 0000000000..d65a7dde6c
--- /dev/null
+++ b/tests/benchmark/system_tests/CL/LeNet5.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CL/CLAccessor.h"
+#include "CL/Helper.h"
+#include "Globals.h"
+#include "TensorLibrary.h"
+#include "benchmark/Datasets.h"
+#include "benchmark/Profiler.h"
+#include "benchmark/WallClockTimer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
+#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
+#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h"
+#include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h"
+
+#include "benchmark/benchmark_api.h"
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+using namespace arm_compute::test::cl;
+
+#include "benchmark/system_tests/common/LeNet5.h"
+
+namespace
+{
+using LeNet5SystemTest = LeNet5Fixture<CLTensor,
+      CLAccessor,
+      CLActivationLayer,
+      CLConvolutionLayer,
+      CLFullyConnectedLayer,
+      CLPoolingLayer,
+      CLSoftmaxLayer>;
+} // namespace
+
+BENCHMARK_DEFINE_F(LeNet5SystemTest, cl_lenet5)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run LeNet5
+        profiler.start();
+        network.run();
+        CLScheduler::get().sync();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(LeNet5SystemTest, cl_lenet5)
+->Threads(1)
+->Iterations(10)
+->ArgName("batch_size")
+->Arg(1)
+->Arg(16)
+->Arg(32);
diff --git a/tests/benchmark/system_tests/NEON/AlexNet.cpp b/tests/benchmark/system_tests/NEON/AlexNet.cpp
new file mode 100644
index 0000000000..2d222e7309
--- /dev/null
+++ b/tests/benchmark/system_tests/NEON/AlexNet.cpp
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "benchmark/Datasets.h"
+#include "benchmark/Profiler.h"
+#include "benchmark/WallClockTimer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
+#include "arm_compute/runtime/NEON/functions/NENormalizationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h"
+#include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h"
+#include "arm_compute/runtime/SubTensor.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "benchmark/benchmark_api.h"
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+using namespace arm_compute::test::neon;
+
+#include "benchmark/system_tests/common/AlexNet.h"
+
+namespace
+{
+using AlexNetSystemTestF32 = AlexNetFixture<ITensor,
+      Tensor,
+      SubTensor,
+      NEAccessor,
+      NEActivationLayer,
+      NEConvolutionLayer,
+      NEFullyConnectedLayer,
+      NENormalizationLayer,
+      NEPoolingLayer,
+      NESoftmaxLayer,
+      DataType::F32>;
+
+using AlexNetSystemTestQS8 = AlexNetFixture<ITensor,
+      Tensor,
+      SubTensor,
+      NEAccessor,
+      NEActivationLayer,
+      NEConvolutionLayer,
+      NEFullyConnectedLayer,
+      NENormalizationLayer,
+      NEPoolingLayer,
+      NESoftmaxLayer,
+      DataType::QS8>;
+} // namespace
+
+// F32
+BENCHMARK_DEFINE_F(AlexNetSystemTestF32, neon_alexnet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run AlexNet
+        profiler.start();
+        network.run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(AlexNetSystemTestF32, neon_alexnet)
+->Threads(1)
+->Iterations(10)
+->ArgName("batch_size")
+->Arg(1)
+->Arg(4)
+->Arg(8);
+
+// QS8
+BENCHMARK_DEFINE_F(AlexNetSystemTestQS8, neon_alexnet)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run AlexNet
+        profiler.start();
+        network.run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(AlexNetSystemTestQS8, neon_alexnet)
+->Threads(1)
+->Iterations(10)
+->ArgName("batch_size")
+->Arg(1)
+->Arg(4)
+->Arg(8);
\ No newline at end of file
diff --git a/tests/benchmark/system_tests/NEON/LeNet5.cpp b/tests/benchmark/system_tests/NEON/LeNet5.cpp
new file mode 100644
index 0000000000..5170f05a70
--- /dev/null
+++ b/tests/benchmark/system_tests/NEON/LeNet5.cpp
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "benchmark/Datasets.h"
+#include "benchmark/Profiler.h"
+#include "benchmark/WallClockTimer.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h"
+#include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "benchmark/benchmark_api.h"
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+using namespace arm_compute::test::neon;
+
+#include "benchmark/system_tests/common/LeNet5.h"
+
+namespace
+{
+using LeNet5SystemTest = LeNet5Fixture<Tensor,
+      NEAccessor,
+      NEActivationLayer,
+      NEConvolutionLayer,
+      NEFullyConnectedLayer,
+      NEPoolingLayer,
+      NESoftmaxLayer>;
+} // namespace
+
+BENCHMARK_DEFINE_F(LeNet5SystemTest, neon_lenet5)
+(::benchmark::State &state)
+{
+    while(state.KeepRunning())
+    {
+        // Run LeNet5
+        profiler.start();
+        network.run();
+        profiler.stop();
+    }
+}
+
+BENCHMARK_REGISTER_F(LeNet5SystemTest, neon_lenet5)
+->Threads(1)
+->Iterations(10)
+->ArgName("batch_size")
+->Arg(1)
+->Arg(16)
+->Arg(32);
diff --git a/tests/benchmark/system_tests/common/AlexNet.h b/tests/benchmark/system_tests/common/AlexNet.h
new file mode 100644
index 0000000000..9c93dc7228
--- /dev/null
+++ b/tests/benchmark/system_tests/common/AlexNet.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_BENCHMARK_ALEXNET_H__
+#define __ARM_COMPUTE_TEST_BENCHMARK_ALEXNET_H__
+
+#include "TensorLibrary.h"
+#include "Utils.h"
+
+#include "benchmark/Profiler.h"
+#include "benchmark/WallClockTimer.h"
+
+#include "model_objects/AlexNet.h"
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+template <typename ITensorType,
+          typename TensorType,
+          typename SubTensorType,
+          typename Accessor,
+          typename ActivationLayerFunction,
+          typename ConvolutionLayerFunction,
+          typename FullyConnectedLayerFunction,
+          typename NormalizationLayerFunction,
+          typename PoolingLayerFunction,
+          typename SoftmaxLayerFunction,
+          DataType dt = DataType::F32>
+class AlexNetFixture : public ::benchmark::Fixture
+{
+public:
+    void SetUp(::benchmark::State &state) override
+    {
+        profiler.add(std::make_shared<WallClockTimer>());
+
+        const unsigned int batches            = static_cast<unsigned int>(state.range(0));
+        const bool         weights_transposed = true;
+
+        network.init_weights(batches, weights_transposed);
+        network.build();
+        network.allocate();
+        network.fill_random();
+    }
+
+    void TearDown(::benchmark::State &state) override
+    {
+        profiler.submit(state);
+        network.clear();
+    }
+
+    Profiler profiler{};
+    model_objects::AlexNet<ITensorType,
+                  TensorType,
+                  SubTensorType,
+                  Accessor,
+                  ActivationLayerFunction,
+                  ConvolutionLayerFunction,
+                  FullyConnectedLayerFunction,
+                  NormalizationLayerFunction,
+                  PoolingLayerFunction,
+                  SoftmaxLayerFunction,
+                  dt>
+                  network{};
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif //__ARM_COMPUTE_TEST_BENCHMARK_ALEXNET_H__
diff --git a/tests/benchmark/system_tests/common/LeNet5.h b/tests/benchmark/system_tests/common/LeNet5.h
new file mode 100644
index 0000000000..db34f6813a
--- /dev/null
+++ b/tests/benchmark/system_tests/common/LeNet5.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_BENCHMARK_LENET5_H__
+#define __ARM_COMPUTE_TEST_BENCHMARK_LENET5_H__
+
+#include "TensorLibrary.h"
+#include "Utils.h"
+
+#include "benchmark/Profiler.h"
+#include "benchmark/WallClockTimer.h"
+
+#include "model_objects/LeNet5.h"
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::benchmark;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace benchmark
+{
+template <typename TensorType,
+          typename Accessor,
+          typename ActivationLayerFunction,
+          typename ConvolutionLayerFunction,
+          typename FullyConnectedLayerFunction,
+          typename PoolingLayerFunction,
+          typename SoftmaxLayerFunction>
+class LeNet5Fixture : public ::benchmark::Fixture
+{
+public:
+    void SetUp(::benchmark::State &state) override
+    {
+        profiler.add(std::make_shared<WallClockTimer>());
+
+        network.build(static_cast<unsigned int>(state.range(0)));
+        network.fill_random();
+    }
+
+    void TearDown(::benchmark::State &state) override
+    {
+        profiler.submit(state);
+        network.clear();
+    }
+
+    Profiler profiler{};
+    model_objects::LeNet5<TensorType,
+                  Accessor,
+                  ActivationLayerFunction,
+                  ConvolutionLayerFunction,
+                  FullyConnectedLayerFunction,
+                  PoolingLayerFunction,
+                  SoftmaxLayerFunction>
+                  network{};
+};
+} // namespace benchmark
+} // namespace test
+} // namespace arm_compute
+#endif //__ARM_COMPUTE_TEST_BENCHMARK_LENET5_H__
diff --git a/tests/boost_wrapper.h b/tests/boost_wrapper.h
new file mode 100644
index 0000000000..b584e4cd1f
--- /dev/null
+++ b/tests/boost_wrapper.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Woverloaded-virtual"
+#pragma GCC diagnostic ignored "-Weffc++"
+#pragma GCC diagnostic ignored "-Wctor-dtor-privacy"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#pragma GCC diagnostic ignored "-Wsign-compare"
+#include "boost/test/unit_test.hpp"
+#include "boost/variant.hpp"
+#include "boost/variant/multivisitors.hpp"
+#pragma GCC diagnostic pop
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Weffc++"
+#include "boost/test/data/test_case.hpp"
+#pragma GCC diagnostic pop
+
+#include "boost/test/data/monomorphic.hpp"
diff --git a/tests/dataset/ActivationFunctionDataset.h b/tests/dataset/ActivationFunctionDataset.h
new file mode 100644
index 0000000000..11e4baac78
--- /dev/null
+++ b/tests/dataset/ActivationFunctionDataset.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_DATASET_ACTIVATION_FUNCTION_DATASET_H__
+#define __ARM_COMPUTE_TEST_DATASET_ACTIVATION_FUNCTION_DATASET_H__
+
+#include "arm_compute/core/Types.h"
+#include "dataset/GenericDataset.h"
+
+#ifdef BOOST
+#include "boost_wrapper.h"
+#endif
+
+namespace arm_compute
+{
+namespace test
+{
+/** Data set containing all possible activation functions.
+ *
+ * Can be used as input for Boost data test cases to automatically run a test
+ * case on all activation functions.
+ */
+class ActivationFunctions final : public GenericDataset<ActivationLayerInfo::ActivationFunction, 9>
+{
+public:
+    ActivationFunctions()
+        : GenericDataset
+    {
+        ActivationLayerInfo::ActivationFunction::ABS,
+        ActivationLayerInfo::ActivationFunction::BOUNDED_RELU,
+        ActivationLayerInfo::ActivationFunction::LINEAR,
+        ActivationLayerInfo::ActivationFunction::LOGISTIC,
+        ActivationLayerInfo::ActivationFunction::RELU,
+        ActivationLayerInfo::ActivationFunction::SOFT_RELU,
+        ActivationLayerInfo::ActivationFunction::SQRT,
+        ActivationLayerInfo::ActivationFunction::SQUARE,
+        ActivationLayerInfo::ActivationFunction::TANH
+    }
+    {
+    }
+
+    ~ActivationFunctions() = default;
+};
+} // namespace test
+} // namespace arm_compute
+#endif //__ARM_COMPUTE_TEST_DATASET_ACTIVATION_FUNCTION_DATASET_H__
diff --git a/tests/dataset/ActivationLayerDataset.h b/tests/dataset/ActivationLayerDataset.h
new file mode 100644
index 0000000000..9d7cffba14
--- /dev/null
+++ b/tests/dataset/ActivationLayerDataset.h
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_DATASET_ACTIVATION_LAYER_DATASET_H__
+#define __ARM_COMPUTE_TEST_DATASET_ACTIVATION_LAYER_DATASET_H__
+
+#include "TypePrinter.h"
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "dataset/GenericDataset.h"
+
+#include <sstream>
+#include <type_traits>
+
+#ifdef BOOST
+#include "boost_wrapper.h"
+#endif
+
+namespace arm_compute
+{
+namespace test
+{
+class ActivationLayerDataObject
+{
+public:
+    operator std::string() const
+    {
+        std::stringstream ss;
+        ss << "ActivationLayer";
+        ss << "_I" << shape;
+        ss << "_F_" << info.activation();
+        return ss.str();
+    }
+
+public:
+    TensorShape         shape;
+    ActivationLayerInfo info;
+};
+
+template <unsigned int Size>
+using ActivationLayerDataset = GenericDataset<ActivationLayerDataObject, Size>;
+
+class AlexNetActivationLayerDataset final : public ActivationLayerDataset<5>
+{
+public:
+    AlexNetActivationLayerDataset()
+        : GenericDataset
+    {
+        ActivationLayerDataObject{ TensorShape(55U, 55U, 96U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        ActivationLayerDataObject{ TensorShape(27U, 27U, 256U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        ActivationLayerDataObject{ TensorShape(13U, 13U, 384U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        ActivationLayerDataObject{ TensorShape(13U, 13U, 256U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        ActivationLayerDataObject{ TensorShape(4096U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+    }
+    {
+    }
+
+    ~AlexNetActivationLayerDataset() = default;
+};
+
+class LeNet5ActivationLayerDataset final : public ActivationLayerDataset<1>
+{
+public:
+    LeNet5ActivationLayerDataset()
+        : GenericDataset
+    {
+        ActivationLayerDataObject{ TensorShape(500U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+    }
+    {
+    }
+
+    ~LeNet5ActivationLayerDataset() = default;
+};
+
+class GoogLeNetActivationLayerDataset final : public ActivationLayerDataset<33>
+{
+public:
+    GoogLeNetActivationLayerDataset()
+        : GenericDataset
+    {
+        // conv1/relu_7x7
+        ActivationLayerDataObject{ TensorShape(112U, 112U, 64U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // conv2/relu_3x3_reduce
+        ActivationLayerDataObject{ TensorShape(56U, 56U, 64U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // conv2/relu_3x3
+        ActivationLayerDataObject{ TensorShape(56U, 56U, 192U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_3a/relu_1x1, inception_3b/relu_pool_proj
+        ActivationLayerDataObject{ TensorShape(28U, 28U, 64U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_3a/relu_3x3_reduce, inception_3b/relu_5x5
+        ActivationLayerDataObject{ TensorShape(28U, 28U, 96U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_3a/relu_3x3, inception_3b/relu_1x1, inception_3b/relu_3x3_reduce
+        ActivationLayerDataObject{ TensorShape(28U, 28U, 128U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_3a/relu_5x5_reduce
+        ActivationLayerDataObject{ TensorShape(28U, 28U, 16U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_3a/relu_5x5, inception_3a/relu_pool_proj, inception_3b/relu_5x5_reduce
+        ActivationLayerDataObject{ TensorShape(28U, 28U, 32U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_3b/relu_3x3
+        ActivationLayerDataObject{ TensorShape(28U, 28U, 192U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_4a/relu_1x1
+        ActivationLayerDataObject{ TensorShape(14U, 14U, 192U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_4a/relu_3x3_reduce
+        ActivationLayerDataObject{ TensorShape(14U, 14U, 96U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_4a/relu_3x3
+        ActivationLayerDataObject{ TensorShape(14U, 14U, 208U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_4a/relu_5x5_reduce
+        ActivationLayerDataObject{ TensorShape(14U, 14U, 16U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_4a/relu_5x5
+        ActivationLayerDataObject{ TensorShape(14U, 14U, 48U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_4a/relu_pool_proj, inception_4b/relu_5x5, inception_4b/relu_pool_proj, inception_4c/relu_5x5, inception_4c/relu_pool_proj, inception_4d/relu_5x5, inception_4d/relu_pool_proj
+        ActivationLayerDataObject{ TensorShape(14U, 14U, 64U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_4b/relu_1x1, inception_4e/relu_3x3_reduce
+        ActivationLayerDataObject{ TensorShape(14U, 14U, 160U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_4b/relu_3x3_reduce, inception_4d/relu_1x1
+        ActivationLayerDataObject{ TensorShape(14U, 14U, 112U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_4b/relu_3x3
+        ActivationLayerDataObject{ TensorShape(14U, 14U, 224U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_4b/relu_5x5_reduce, inception_4c/relu_5x5_reduce
+        ActivationLayerDataObject{ TensorShape(14U, 14U, 24U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_4c/relu_1x1, inception_4c/relu_3x3_reduce, inception_4e/relu_5x5, inception_4e/relu_pool_proj
+        ActivationLayerDataObject{ TensorShape(14U, 14U, 128U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_4c/relu_3x3, inception_4e/relu_1x1
+        ActivationLayerDataObject{ TensorShape(14U, 14U, 256U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_4d/relu_3x3_reduce
+        ActivationLayerDataObject{ TensorShape(14U, 14U, 144U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_4d/relu_3x3
+        ActivationLayerDataObject{ TensorShape(14U, 14U, 288U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_4d/relu_5x5_reduce, inception_4e/relu_5x5_reduce
+        ActivationLayerDataObject{ TensorShape(14U, 14U, 32U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_4e/relu_3x3
+        ActivationLayerDataObject{ TensorShape(14U, 14U, 320U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_5a/relu_1x1
+        ActivationLayerDataObject{ TensorShape(7U, 7U, 256U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_5a/relu_3x3_reduce
+        ActivationLayerDataObject{ TensorShape(7U, 7U, 160U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_5a/relu_3x3
+        ActivationLayerDataObject{ TensorShape(7U, 7U, 320U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_5a/relu_5x5_reduce
+        ActivationLayerDataObject{ TensorShape(7U, 7U, 32U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_5a/relu_5x5, inception_5a/relu_pool_proj, inception_5b/relu_5x5, inception_5b/relu_pool_proj
+        ActivationLayerDataObject{ TensorShape(7U, 7U, 128U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_5b/relu_1x1, inception_5b/relu_3x3
+        ActivationLayerDataObject{ TensorShape(7U, 7U, 384U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_5b/relu_3x3_reduce
+        ActivationLayerDataObject{ TensorShape(7U, 7U, 192U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) },
+        // inception_5b/relu_5x5_reduce
+        ActivationLayerDataObject{ TensorShape(7U, 7U, 48U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) }
+    }
+    {
+    }
+
+    ~GoogLeNetActivationLayerDataset() = default;
+};
+
+} // namespace test
+} // namespace arm_compute
+#endif //__ARM_COMPUTE_TEST_DATASET_ACTIVATION_LAYER_DATASET_H__
diff --git a/tests/dataset/BatchNormalizationLayerDataset.h b/tests/dataset/BatchNormalizationLayerDataset.h
new file mode 100644
index 0000000000..4323b8fe93
--- /dev/null
+++ b/tests/dataset/BatchNormalizationLayerDataset.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_DATASET_BATCH_NORMALIZATION_LAYER_DATASET_H__
+#define __ARM_COMPUTE_TEST_DATASET_BATCH_NORMALIZATION_LAYER_DATASET_H__
+
+#include "TypePrinter.h"
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "dataset/GenericDataset.h"
+
+#include <ostream>
+#include <sstream>
+
+#ifdef BOOST
+#include "boost_wrapper.h"
+#endif
+
+namespace arm_compute
+{
+namespace test
+{
+class BatchNormalizationLayerDataObject
+{
+public:
+    operator std::string() const
+    {
+        std::stringstream ss;
+        ss << "BatchNormalizationLayer";
+        ss << "_I" << shape0;
+        ss << "_I" << shape1;
+        ss << "_I" << epsilon;
+        return ss.str();
+    }
+
+    friend std::ostream &operator<<(std::ostream &s, const BatchNormalizationLayerDataObject &obj)
+    {
+        s << static_cast<std::string>(obj);
+        return s;
+    }
+
+public:
+    TensorShape shape0;
+    TensorShape shape1;
+    float       epsilon;
+};
+
+template <unsigned int Size>
+using BatchNormalizationLayerDataset = GenericDataset<BatchNormalizationLayerDataObject, Size>;
+
+class RandomBatchNormalizationLayerDataset final : public BatchNormalizationLayerDataset<3>
+{
+public:
+    RandomBatchNormalizationLayerDataset()
+        : GenericDataset
+    {
+        BatchNormalizationLayerDataObject{ TensorShape(15U, 16U, 2U, 12U), TensorShape(2U), 0.1f },
+        BatchNormalizationLayerDataObject{ TensorShape(21U, 11U, 12U, 7U), TensorShape(12U), 0.1f },
+        BatchNormalizationLayerDataObject{ TensorShape(7U, 3U, 6U, 11U), TensorShape(6U), 0.1f },
+    }
+    {
+    }
+
+    ~RandomBatchNormalizationLayerDataset() = default;
+};
+
+} // namespace test
+} // namespace arm_compute
+#endif //__ARM_COMPUTE_TEST_DATASET_BATCH_NORMALIZATION_LAYER_DATASET_H__
diff --git a/tests/dataset/BorderModeDataset.h b/tests/dataset/BorderModeDataset.h
new file mode 100644
index 0000000000..37c7a5ba10
--- /dev/null
+++ b/tests/dataset/BorderModeDataset.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_BORDER_MODE_DATASET_H__
+#define __ARM_COMPUTE_TEST_BORDER_MODE_DATASET_H__
+
+#include "arm_compute/core/Types.h"
+
+#ifdef BOOST
+#include "boost_wrapper.h"
+#endif
+
+#include <array>
+
+namespace arm_compute
+{
+namespace test
+{
+/** Data set containing all possible border modes.
+ *
+ * Can be used as input for Boost data test cases to automatically run a test
+ * case on all border modes.
+ */
+class BorderModes
+{
+public:
+    /** Type of the samples in the data set. */
+    using sample = BorderMode;
+
+    /** Dimensionality of the data set. */
+    enum
+    {
+        arity = 1
+    };
+
+    /** Number of samples in the data set. */
+#ifdef BOOST
+    boost::unit_test::data::size_t size() const
+#else
+    unsigned int size() const
+#endif
+    {
+        return _modes.size();
+    }
+
+    /** Type of the iterator used to step through all samples in the data set.
+     * Needs to support operator*() and operator++() which a pointer does.
+     */
+    using iterator = const BorderMode *;
+
+    /** Iterator to the first sample in the data set. */
+    iterator begin() const
+    {
+        return _modes.data();
+    }
+
+private:
+    std::array<BorderMode, 3> _modes{ { BorderMode::UNDEFINED, BorderMode::CONSTANT, BorderMode::REPLICATE } };
+};
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/dataset/ConvertPolicyDataset.h b/tests/dataset/ConvertPolicyDataset.h
new file mode 100644
index 0000000000..697dba615b
--- /dev/null
+++ b/tests/dataset/ConvertPolicyDataset.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_CONVERT_POLICY_DATASETS_H__
+#define __ARM_COMPUTE_TEST_CONVERT_POLICY_DATASETS_H__
+
+#include "arm_compute/core/Types.h"
+
+#include <type_traits>
+
+#ifdef BOOST
+#include "boost_wrapper.h"
+#endif
+
+namespace arm_compute
+{
+namespace test
+{
+/** Data set containing all possible convert/overflow policies.
+ *
+ * Can be used as input for Boost data test cases to automatically run a test
+ * case on different convert policies.
+ */
+class ConvertPolicies
+{
+public:
+    /** Type of the samples in the data set. */
+    using sample = ConvertPolicy;
+
+    /** Dimensionality of the data set. */
+    enum
+    {
+        arity = 1
+    };
+
+    /** Number of samples in the data set. */
+#ifdef BOOST
+    boost::unit_test::data::size_t size() const
+#else
+    unsigned int size() const
+#endif
+    {
+        return _policies.size();
+    }
+
+    /** Type of the iterator used to step through all samples in the data set.
+     * Needs to support operator*() and operator++() which a pointer does.
+     */
+    using iterator = const ConvertPolicy *;
+
+    /** Iterator to the first sample in the data set. */
+    iterator begin() const
+    {
+        return _policies.data();
+    }
+
+private:
+    std::array<ConvertPolicy, 2> _policies{ { ConvertPolicy::WRAP, ConvertPolicy::SATURATE } };
+};
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/dataset/ConvolutionLayerDataset.h b/tests/dataset/ConvolutionLayerDataset.h
new file mode 100644
index 0000000000..e66117e0d8
--- /dev/null
+++ b/tests/dataset/ConvolutionLayerDataset.h
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_DATASET_CONVOLUTION_LAYER_DATASET_H__
+#define __ARM_COMPUTE_TEST_DATASET_CONVOLUTION_LAYER_DATASET_H__
+
+#include "TypePrinter.h"
+
+#include "arm_compute/core/TensorShape.h"
+#include "dataset/GenericDataset.h"
+#include "dataset/ShapeDatasets.h"
+
+#include <sstream>
+#include <type_traits>
+
+#ifdef BOOST
+#include "boost_wrapper.h"
+#endif
+
+namespace arm_compute
+{
+namespace test
+{
+/** Convolution Layer data object */
+class ConvolutionLayerDataObject
+{
+public:
+    operator std::string() const
+    {
+        std::stringstream ss;
+        ss << "ConvolutionLayer";
+        ss << "_I" << src_shape;
+        ss << "_K" << weights_shape;
+        ss << "_PS" << info;
+        return ss.str();
+    }
+
+    friend std::ostream &operator<<(std::ostream &os, const ConvolutionLayerDataObject &obj)
+    {
+        os << static_cast<std::string>(obj);
+        return os;
+    }
+
+public:
+    TensorShape   src_shape;
+    TensorShape   weights_shape;
+    TensorShape   bias_shape;
+    TensorShape   dst_shape;
+    PadStrideInfo info;
+};
+
+template <unsigned int Size>
+using ConvolutionLayerDataset = GenericDataset<ConvolutionLayerDataObject, Size>;
+
+/** Data set containing small convolution layer shapes */
+class SmallConvolutionLayerDataset final : public ConvolutionLayerDataset<3>
+{
+public:
+    SmallConvolutionLayerDataset()
+        : GenericDataset
+    {
+        ConvolutionLayerDataObject{ TensorShape(23U, 27U, 5U), TensorShape(3U, 3U, 5U, 21U), TensorShape(21U), TensorShape(11U, 25U, 21U), PadStrideInfo(2, 1, 0, 0) },
+        ConvolutionLayerDataObject{ TensorShape(33U, 27U, 7U), TensorShape(5U, 5U, 7U, 11U), TensorShape(11U), TensorShape(11U, 12U, 11U), PadStrideInfo(3, 2, 1, 0) },
+        ConvolutionLayerDataObject{ TensorShape(17U, 31U, 2U, 7U), TensorShape(5U, 5U, 2U, 5U), TensorShape(5U), TensorShape(15U, 15U, 5U, 7U), PadStrideInfo(1, 2, 1, 1) }
+    }
+    {
+    }
+
+    ~SmallConvolutionLayerDataset() = default;
+};
+
+/** Data set containing direct convolution tensor shapes. */
+class DirectConvolutionShapes final : public ShapeDataset<3>
+{
+public:
+    DirectConvolutionShapes()
+        : ShapeDataset(TensorShape(3U, 3U, 3U, 2U, 4U, 5U),
+                       TensorShape(32U, 37U, 3U),
+                       TensorShape(13U, 15U, 8U, 3U))
+    {
+    }
+};
+
+/** AlexNet's convolution layers tensor shapes. */
+class AlexNetConvolutionLayerDataset final : public ConvolutionLayerDataset<5>
+{
+public:
+    AlexNetConvolutionLayerDataset()
+        : GenericDataset
+    {
+        ConvolutionLayerDataObject{ TensorShape(227U, 227U, 3U), TensorShape(11U, 11U, 3U, 96U), TensorShape(96U), TensorShape(55U, 55U, 96U), PadStrideInfo(4, 4, 0, 0) },
+        ConvolutionLayerDataObject{ TensorShape(27U, 27U, 96U), TensorShape(5U, 5U, 96U, 256U), TensorShape(256U), TensorShape(27U, 27U, 256U), PadStrideInfo(1, 1, 2, 2) },
+        ConvolutionLayerDataObject{ TensorShape(13U, 13U, 256U), TensorShape(3U, 3U, 256U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U), PadStrideInfo(1, 1, 1, 1) },
+        ConvolutionLayerDataObject{ TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 384U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U), PadStrideInfo(1, 1, 1, 1) },
+        ConvolutionLayerDataObject{ TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 384U, 256U), TensorShape(256U), TensorShape(13U, 13U, 256U), PadStrideInfo(1, 1, 1, 1) }
+    }
+    {
+    }
+
+    ~AlexNetConvolutionLayerDataset() = default;
+};
+
+/** LeNet5's convolution layers tensor shapes. */
+class LeNet5ConvolutionLayerDataset final : public ConvolutionLayerDataset<2>
+{
+public:
+    LeNet5ConvolutionLayerDataset()
+        : GenericDataset
+    {
+        ConvolutionLayerDataObject{ TensorShape(28U, 28U, 1U), TensorShape(5U, 5U, 1U, 20U), TensorShape(20U), TensorShape(24U, 24U, 20U), PadStrideInfo(1, 1, 0, 0) },
+        ConvolutionLayerDataObject{ TensorShape(12U, 12U, 20U), TensorShape(5U, 5U, 20U, 50U), TensorShape(50U), TensorShape(8U, 8U, 50U), PadStrideInfo(1, 1, 0, 0) },
+    }
+    {
+    }
+
+    ~LeNet5ConvolutionLayerDataset() = default;
+};
+
+/** GoogleLeNet v1 convolution layers tensor shapes (Part 1).
+ *
+ * @note Dataset is split into two to avoid a register allocation failure produced by clang in Android debug builds.
+ */
+class GoogLeNetConvolutionLayerDataset1 final : public ConvolutionLayerDataset<32>
+{
+public:
+    GoogLeNetConvolutionLayerDataset1()
+        : GenericDataset
+    {
+        // conv1/7x7_s2
+        ConvolutionLayerDataObject{ TensorShape(224U, 224U, 3U), TensorShape(7U, 7U, 3U, 64U), TensorShape(64U), TensorShape(112U, 112U, 64U), PadStrideInfo(2, 2, 3, 3) },
+        // conv2/3x3_reduce
+        ConvolutionLayerDataObject{ TensorShape(56U, 56U, 64U), TensorShape(1U, 1U, 64U, 64U), TensorShape(64U), TensorShape(56U, 56U, 64U), PadStrideInfo(1, 1, 0, 0) },
+        // conv2/3x3
+        ConvolutionLayerDataObject{ TensorShape(56U, 56U, 64U), TensorShape(3U, 3U, 64U, 192U), TensorShape(192U), TensorShape(56U, 56U, 192U), PadStrideInfo(1, 1, 1, 1) },
+        // inception_3a/1x1
+        ConvolutionLayerDataObject{ TensorShape(28U, 28U, 192U), TensorShape(1U, 1U, 192U, 64U), TensorShape(64U), TensorShape(28U, 28U, 64U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_3a/3x3_reduce
+        ConvolutionLayerDataObject{ TensorShape(28U, 28U, 192U), TensorShape(1U, 1U, 192U, 96U), TensorShape(96U), TensorShape(28U, 28U, 96U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_3a/3x3
+        ConvolutionLayerDataObject{ TensorShape(28U, 28U, 96U), TensorShape(3U, 3U, 96U, 128U), TensorShape(128U), TensorShape(28U, 28U, 128U), PadStrideInfo(1, 1, 1, 1) },
+        // inception_3a/5x5_reduce
+        ConvolutionLayerDataObject{ TensorShape(28U, 28U, 192U), TensorShape(1U, 1U, 192U, 16U), TensorShape(16U), TensorShape(28U, 28U, 16U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_3a/5x5
+        ConvolutionLayerDataObject{ TensorShape(28U, 28U, 16U), TensorShape(5U, 5U, 16U, 32U), TensorShape(32U), TensorShape(28U, 28U, 32U), PadStrideInfo(1, 1, 2, 2) },
+        // inception_3a/pool_proj
+        ConvolutionLayerDataObject{ TensorShape(28U, 28U, 192U), TensorShape(1U, 1U, 192U, 32U), TensorShape(32U), TensorShape(28U, 28U, 32U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_3b/1x1, inception_3b/3x3_reduce
+        ConvolutionLayerDataObject{ TensorShape(28U, 28U, 256U), TensorShape(1U, 1U, 256U, 128U), TensorShape(128U), TensorShape(28U, 28U, 128U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_3b/3x3
+        ConvolutionLayerDataObject{ TensorShape(28U, 28U, 128U), TensorShape(3U, 3U, 128U, 192U), TensorShape(192U), TensorShape(28U, 28U, 192U), PadStrideInfo(1, 1, 1, 1) },
+        // inception_3b/5x5_reduce
+        ConvolutionLayerDataObject{ TensorShape(28U, 28U, 256U), TensorShape(1U, 1U, 256U, 32U), TensorShape(32U), TensorShape(28U, 28U, 32U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_3b/5x5
+        ConvolutionLayerDataObject{ TensorShape(28U, 28U, 32U), TensorShape(5U, 5U, 32U, 96U), TensorShape(96U), TensorShape(28U, 28U, 96U), PadStrideInfo(1, 1, 2, 2) },
+        // inception_3b/pool_proj
+        ConvolutionLayerDataObject{ TensorShape(28U, 28U, 256U), TensorShape(1U, 1U, 256U, 64U), TensorShape(64U), TensorShape(28U, 28U, 64U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_4a/1x1
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 480U), TensorShape(1U, 1U, 480U, 192U), TensorShape(192U), TensorShape(14U, 14U, 192U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_4a/3x3_reduce
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 480U), TensorShape(1U, 1U, 480U, 96U), TensorShape(96U), TensorShape(14U, 14U, 96U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_4a/3x3
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 96U), TensorShape(3U, 3U, 96U, 208U), TensorShape(208U), TensorShape(14U, 14U, 208U), PadStrideInfo(1, 1, 1, 1) },
+        // inception_4a/5x5_reduce
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 480U), TensorShape(1U, 1U, 480U, 16U), TensorShape(16U), TensorShape(14U, 14U, 16U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_4a/5x5
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 16U), TensorShape(5U, 5U, 16U, 48U), TensorShape(48U), TensorShape(14U, 14U, 48U), PadStrideInfo(1, 1, 2, 2) },
+        // inception_4a/pool_proj
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 480U), TensorShape(1U, 1U, 480U, 64U), TensorShape(64U), TensorShape(14U, 14U, 64U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_4b/1x1
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 512U), TensorShape(1U, 1U, 512U, 160U), TensorShape(160U), TensorShape(14U, 14U, 160U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_4b/3x3_reduce, inception_4d/1x1
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 512U), TensorShape(1U, 1U, 512U, 112U), TensorShape(112U), TensorShape(14U, 14U, 112U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_4b/3x3
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 112U), TensorShape(3U, 3U, 112U, 224U), TensorShape(224U), TensorShape(14U, 14U, 224U), PadStrideInfo(1, 1, 1, 1) },
+        // inception_4b/5x5_reduce, inception_4c/5x5_reduce
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 512U), TensorShape(1U, 1U, 512U, 24U), TensorShape(24U), TensorShape(14U, 14U, 24U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_4b/5x5, inception_4c/5x5
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 24U), TensorShape(5U, 5U, 24U, 64U), TensorShape(64U), TensorShape(14U, 14U, 64U), PadStrideInfo(1, 1, 2, 2) },
+        // inception_4b/pool_proj, inception_4c/pool_proj, inception_4d/pool_proj
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 512U), TensorShape(1U, 1U, 512U, 64U), TensorShape(64U), TensorShape(14U, 14U, 64U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_4c/1x1, inception_4c/3x3_reduce
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 512U), TensorShape(1U, 1U, 512U, 128U), TensorShape(128U), TensorShape(14U, 14U, 128U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_4c/3x3
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 128U), TensorShape(3U, 3U, 128U, 256U), TensorShape(256U), TensorShape(14U, 14U, 256U), PadStrideInfo(1, 1, 1, 1) },
+        // inception_4d/3x3_reduce
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 512U), TensorShape(1U, 1U, 512U, 144U), TensorShape(144U), TensorShape(14U, 14U, 144U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_4d/3x3
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 144U), TensorShape(3U, 3U, 144U, 288U), TensorShape(288U), TensorShape(14U, 14U, 288U), PadStrideInfo(1, 1, 1, 1) },
+        // inception_4d/5x5_reduce
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 512U), TensorShape(1U, 1U, 512U, 32U), TensorShape(32U), TensorShape(14U, 14U, 32U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_4d/5x5
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 32U), TensorShape(5U, 5U, 32U, 64U), TensorShape(64U), TensorShape(14U, 14U, 64U), PadStrideInfo(1, 1, 2, 2) },
+    }
+    {
+    }
+
+    ~GoogLeNetConvolutionLayerDataset1() = default;
+};
+
+/** GoogleLeNet v1 convolution layers tensor shapes (Part 2). */
+class GoogLeNetConvolutionLayerDataset2 final : public ConvolutionLayerDataset<17>
+{
+public:
+    GoogLeNetConvolutionLayerDataset2()
+        : GenericDataset
+    {
+        // inception_4e/1x1
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 528U), TensorShape(1U, 1U, 528U, 256U), TensorShape(256U), TensorShape(14U, 14U, 256U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_4e/3x3_reduce
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 528U), TensorShape(1U, 1U, 528U, 160U), TensorShape(160U), TensorShape(14U, 14U, 160U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_4e/3x3
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 160U), TensorShape(3U, 3U, 160U, 320U), TensorShape(320U), TensorShape(14U, 14U, 320U), PadStrideInfo(1, 1, 1, 1) },
+        // inception_4e/5x5_reduce
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 528U), TensorShape(1U, 1U, 528U, 32U), TensorShape(32U), TensorShape(14U, 14U, 32U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_4e/5x5
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 32U), TensorShape(5U, 5U, 32U, 128U), TensorShape(128U), TensorShape(14U, 14U, 128U), PadStrideInfo(1, 1, 2, 2) },
+        // inception_4e/pool_proj
+        ConvolutionLayerDataObject{ TensorShape(14U, 14U, 528U), TensorShape(1U, 1U, 528U, 128U), TensorShape(128U), TensorShape(14U, 14U, 128U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_5a/1x1
+        ConvolutionLayerDataObject{ TensorShape(7U, 7U, 832U), TensorShape(1U, 1U, 832U, 256U), TensorShape(256U), TensorShape(7U, 7U, 256U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_5a/3x3_reduce
+        ConvolutionLayerDataObject{ TensorShape(7U, 7U, 832U), TensorShape(1U, 1U, 832U, 160U), TensorShape(160U), TensorShape(7U, 7U, 160U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_5a/3x3
+        ConvolutionLayerDataObject{ TensorShape(7U, 7U, 160U), TensorShape(3U, 3U, 160U, 320U), TensorShape(320U), TensorShape(7U, 7U, 320U), PadStrideInfo(1, 1, 1, 1) },
+        // inception_5a/5x5_reduce
+        ConvolutionLayerDataObject{ TensorShape(7U, 7U, 832U), TensorShape(1U, 1U, 832U, 32U), TensorShape(32U), TensorShape(7U, 7U, 32U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_5a/5x5
+        ConvolutionLayerDataObject{ TensorShape(7U, 7U, 32U), TensorShape(5U, 5U, 32U, 128U), TensorShape(128U), TensorShape(7U, 7U, 128U), PadStrideInfo(1, 1, 2, 2) },
+        // inception_5a/pool_proj, inception_5b/pool_proj
+        ConvolutionLayerDataObject{ TensorShape(7U, 7U, 832U), TensorShape(1U, 1U, 832U, 128U), TensorShape(128U), TensorShape(7U, 7U, 128U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_5b/1x1
+        ConvolutionLayerDataObject{ TensorShape(7U, 7U, 832U), TensorShape(1U, 1U, 832U, 384U), TensorShape(384U), TensorShape(7U, 7U, 384U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_5b/3x3_reduce
+        ConvolutionLayerDataObject{ TensorShape(7U, 7U, 832U), TensorShape(1U, 1U, 832U, 192U), TensorShape(192U), TensorShape(7U, 7U, 192U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_5b/3x3
+        ConvolutionLayerDataObject{ TensorShape(7U, 7U, 192U), TensorShape(3U, 3U, 192U, 384U), TensorShape(384U), TensorShape(7U, 7U, 384U), PadStrideInfo(1, 1, 1, 1) },
+        // inception_5b/5x5_reduce
+        ConvolutionLayerDataObject{ TensorShape(7U, 7U, 832U), TensorShape(1U, 1U, 832U, 48U), TensorShape(48U), TensorShape(7U, 7U, 48U), PadStrideInfo(1, 1, 0, 0) },
+        // inception_5b/5x5
+        ConvolutionLayerDataObject{ TensorShape(7U, 7U, 48U), TensorShape(5U, 5U, 48U, 128U), TensorShape(128U), TensorShape(7U, 7U, 128U), PadStrideInfo(1, 1, 2, 2) }
+    }
+    {
+    }
+
+    ~GoogLeNetConvolutionLayerDataset2() = default;
+};
+} // namespace test
+} // namespace arm_compute
+#endif //__ARM_COMPUTE_TEST_DATASET_CONVOLUTION_LAYER_DATASET_H__
diff --git a/tests/dataset/DataTypeDatasets.h b/tests/dataset/DataTypeDatasets.h
new file mode 100644
index 0000000000..8c63857477
--- /dev/null
+++ b/tests/dataset/DataTypeDatasets.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_DATA_TYPE_DATASET_H__
+#define __ARM_COMPUTE_TEST_DATA_TYPE_DATASET_H__
+
+#include "arm_compute/core/Types.h"
+
+#ifdef BOOST
+#include "boost_wrapper.h"
+#endif
+
+namespace arm_compute
+{
+namespace test
+{
+/** Abstract data set containing data types.
+ *
+ * Can be used as input for Boost data test cases to automatically run a test
+ * case on different data types.
+ */
+template <unsigned int Size>
+class DataTypes
+{
+public:
+    /** Type of the samples in the data set. */
+    using sample = DataType;
+
+    /** Dimensionality of the data set. */
+    enum
+    {
+        arity = 1
+    };
+
+    /** Number of samples in the data set. */
+#ifdef BOOST
+    boost::unit_test::data::size_t size() const
+#else
+    unsigned int size() const
+#endif
+    {
+        return _types.size();
+    }
+
+    /** Type of the iterator used to step through all samples in the data set.
+     * Needs to support operator*() and operator++() which a pointer does.
+     */
+    using iterator = const DataType *;
+
+    /** Iterator to the first sample in the data set. */
+    iterator begin() const
+    {
+        return _types.data();
+    }
+
+protected:
+    /** Protected constructor to make the class abstract. */
+    template <typename... Ts>
+    DataTypes(Ts &&... types)
+        : _types{ { types... } }
+    {
+    }
+
+    /** Protected destructor to prevent deletion of derived classes through a
+     * pointer to the base class.
+     */
+    ~DataTypes() = default;
+
+private:
+    std::array<DataType, Size> _types;
+};
+
+/** Data set containing all data types. */
+class AllDataTypes final : public DataTypes<14>
+{
+public:
+    AllDataTypes()
+        : DataTypes{ DataType::U8, DataType::S8, DataType::U16, DataType::S16,
+                     DataType::U32, DataType::S32, DataType::U64, DataType::S64,
+                     DataType::F16, DataType::F32, DataType::F64, DataType::SIZET,
+                     DataType::QS8, DataType::QS16 }
+    {
+    }
+
+    ~AllDataTypes() = default;
+};
+
+/** Data set containing all unsigned data types. */
+class UnsignedDataTypes final : public DataTypes<4>
+{
+public:
+    UnsignedDataTypes()
+        : DataTypes{ DataType::U8, DataType::U16, DataType::U32, DataType::U64 }
+    {
+    }
+
+    ~UnsignedDataTypes() = default;
+};
+
+/** Data set containing all signed data types. */
+class SignedDataTypes final : public DataTypes<4>
+{
+public:
+    SignedDataTypes()
+        : DataTypes{ DataType::S8, DataType::S16, DataType::S32, DataType::S64 }
+    {
+    }
+
+    ~SignedDataTypes() = default;
+};
+
+/** Data set containing all floating point data types. */
+class FloatDataTypes final : public DataTypes<3>
+{
+public:
+    FloatDataTypes()
+        : DataTypes{ DataType::F16, DataType::F32, DataType::F64 }
+    {
+    }
+
+    ~FloatDataTypes() = default;
+};
+
+/** Data set containing all fixed point data types. */
+class FixedPointDataTypes final : public DataTypes<2>
+{
+public:
+    FixedPointDataTypes()
+        : DataTypes{ DataType::QS8, DataType::QS16 }
+    {
+    }
+
+    ~FixedPointDataTypes() = default;
+};
+
+/** Supported CNN float types. */
+class CNNFloatDataTypes final : public DataTypes<1>
+{
+public:
+    CNNFloatDataTypes()
+        : DataTypes{ DataType::F32 }
+    {
+    }
+
+    ~CNNFloatDataTypes() = default;
+};
+
+/** Supported CNN fixed point types. */
+class CNNFixedPointDataTypes final : public DataTypes<1>
+{
+public:
+    CNNFixedPointDataTypes()
+        : DataTypes{ DataType::QS8 }
+    {
+    }
+
+    ~CNNFixedPointDataTypes() = default;
+};
+
+/** Supported CNN types. */
+class CNNDataTypes final : public DataTypes<2>
+{
+public:
+    CNNDataTypes()
+        : DataTypes{ DataType::F32, DataType::QS8 }
+    {
+    }
+
+    ~CNNDataTypes() = default;
+};
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/dataset/FullyConnectedLayerDataset.h b/tests/dataset/FullyConnectedLayerDataset.h
new file mode 100644
index 0000000000..53b7d022d7
--- /dev/null
+++ b/tests/dataset/FullyConnectedLayerDataset.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_DATASET_FULLY_CONNECTED_LAYER_DATASET_H__
+#define __ARM_COMPUTE_TEST_DATASET_FULLY_CONNECTED_LAYER_DATASET_H__
+
+#include "TypePrinter.h"
+
+#include "arm_compute/core/TensorShape.h"
+#include "dataset/GenericDataset.h"
+
+#include <sstream>
+#include <type_traits>
+
+#ifdef BOOST
+#include "boost_wrapper.h"
+#endif
+
+namespace arm_compute
+{
+namespace test
+{
+class FullyConnectedLayerDataObject
+{
+public:
+    operator std::string() const
+    {
+        std::stringstream ss;
+        ss << "FullyConnectedLayer";
+        ss << "_I" << src_shape;
+        ss << "_K" << weights_shape;
+        return ss.str();
+    }
+
+    friend std::ostream &operator<<(std::ostream &os, const FullyConnectedLayerDataObject &obj)
+    {
+        os << static_cast<std::string>(obj);
+        return os;
+    }
+
+public:
+    TensorShape src_shape;
+    TensorShape weights_shape;
+    TensorShape bias_shape;
+    TensorShape dst_shape;
+    bool        transpose_weights;
+    bool        are_weights_reshaped;
+};
+
+template <unsigned int Size>
+using FullyConnectedLayerDataset = GenericDataset<FullyConnectedLayerDataObject, Size>;
+
+class SmallFullyConnectedLayerDataset final : public FullyConnectedLayerDataset<5>
+{
+public:
+    SmallFullyConnectedLayerDataset()
+        : GenericDataset
+    {
+        FullyConnectedLayerDataObject{ TensorShape(9U, 5U, 7U), TensorShape(315U, 271U), TensorShape(271U), TensorShape(271U), true, false },
+        FullyConnectedLayerDataObject{ TensorShape(9U, 5U, 7U, 3U), TensorShape(315U, 271U), TensorShape(271U), TensorShape(271U, 3U), true, false },
+        FullyConnectedLayerDataObject{ TensorShape(201U), TensorShape(201U, 529U), TensorShape(529U), TensorShape(529U), true, false },
+        FullyConnectedLayerDataObject{ TensorShape(9U, 5U, 7U), TensorShape(315U, 271U), TensorShape(271U), TensorShape(271U), true, true },
+        FullyConnectedLayerDataObject{ TensorShape(201U), TensorShape(201U, 529U), TensorShape(529U), TensorShape(529U), true, true },
+    }
+    {
+    }
+
+    ~SmallFullyConnectedLayerDataset() = default;
+};
+
+class LargeFullyConnectedLayerDataset final : public FullyConnectedLayerDataset<5>
+{
+public:
+    LargeFullyConnectedLayerDataset()
+        : GenericDataset
+    {
+        FullyConnectedLayerDataObject{ TensorShape(9U, 5U, 257U), TensorShape(11565U, 2123U), TensorShape(2123U), TensorShape(2123U), true, false },
+        FullyConnectedLayerDataObject{ TensorShape(9U, 5U, 257U, 2U), TensorShape(11565U, 2123U), TensorShape(2123U), TensorShape(2123U, 2U), true, false },
+        FullyConnectedLayerDataObject{ TensorShape(3127U), TensorShape(3127U, 989U), TensorShape(989U), TensorShape(989U), true, false },
+        FullyConnectedLayerDataObject{ TensorShape(9U, 5U, 257U), TensorShape(11565U, 2123U), TensorShape(2123U), TensorShape(2123U), true, true },
+        FullyConnectedLayerDataObject{ TensorShape(3127U), TensorShape(3127U, 989U), TensorShape(989U), TensorShape(989U), true, true },
+    }
+    {
+    }
+
+    ~LargeFullyConnectedLayerDataset() = default;
+};
+
+class AlexNetFullyConnectedLayerDataset final : public FullyConnectedLayerDataset<3>
+{
+public:
+    AlexNetFullyConnectedLayerDataset()
+        : GenericDataset
+    {
+        FullyConnectedLayerDataObject{ TensorShape(6U, 6U, 256U), TensorShape(9216U, 4096U), TensorShape(4096U), TensorShape(4096U), true },
+        FullyConnectedLayerDataObject{ TensorShape(4096U), TensorShape(4096U, 4096U), TensorShape(4096U), TensorShape(4096U), true },
+        FullyConnectedLayerDataObject{ TensorShape(4096U), TensorShape(4096U, 1000U), TensorShape(1000U), TensorShape(1000U), true },
+    }
+    {
+    }
+
+    ~AlexNetFullyConnectedLayerDataset() = default;
+};
+
+class LeNet5FullyConnectedLayerDataset final : public FullyConnectedLayerDataset<2>
+{
+public:
+    LeNet5FullyConnectedLayerDataset()
+        : GenericDataset
+    {
+        FullyConnectedLayerDataObject{ TensorShape(4U, 4U, 50U), TensorShape(800U, 500U), TensorShape(500U), TensorShape(500U) },
+        FullyConnectedLayerDataObject{ TensorShape(500U), TensorShape(500U, 10U), TensorShape(10U), TensorShape(10U) },
+    }
+    {
+    }
+
+    ~LeNet5FullyConnectedLayerDataset() = default;
+};
+
+class GoogLeNetFullyConnectedLayerDataset final : public FullyConnectedLayerDataset<1>
+{
+public:
+    GoogLeNetFullyConnectedLayerDataset()
+        : GenericDataset
+    {
+        FullyConnectedLayerDataObject{ TensorShape(1024U), TensorShape(1024U, 1000U), TensorShape(1000U), TensorShape(1000U), true },
+    }
+    {
+    }
+
+    ~GoogLeNetFullyConnectedLayerDataset() = default;
+};
+} // namespace test
+} // namespace arm_compute
+#endif //__ARM_COMPUTE_TEST_DATASET_FULLY_CONNECTED_LAYER_DATASET_H__
diff --git a/tests/dataset/GEMMDataset.h b/tests/dataset/GEMMDataset.h
new file mode 100644
index 0000000000..f45bc3e838
--- /dev/null
+++ b/tests/dataset/GEMMDataset.h
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_DATASET_GEMM_DATASET_H__
+#define __ARM_COMPUTE_TEST_DATASET_GEMM_DATASET_H__
+
+#include "TypePrinter.h"
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "dataset/GenericDataset.h"
+
+#include <ostream>
+#include <sstream>
+
+#include <tuple>
+#include <type_traits>
+
+#ifdef BOOST
+#include "boost_wrapper.h"
+#endif
+
+namespace arm_compute
+{
+namespace test
+{
+class GEMMDataObject
+{
+public:
+    //Data object used for matrix multiple
+    //D = alpha * A * B + beta * C;
+    TensorShape shape_a;
+    TensorShape shape_b;
+    TensorShape shape_c;
+    TensorShape shape_d;
+    float       alpha;
+    float       beta;
+
+    operator std::string() const
+    {
+        std::stringstream ss;
+        ss << "GEMM";
+        ss << "_A" << shape_a;
+        ss << "_B" << shape_b;
+        ss << "_C" << shape_c;
+        ss << "_D" << shape_d;
+        ss << "_alpha" << alpha;
+        ss << "_beta" << beta;
+        return ss.str();
+    }
+
+    friend std::ostream &operator<<(std::ostream &os, const GEMMDataObject &obj)
+    {
+        os << static_cast<std::string>(obj);
+        return os;
+    }
+};
+
+class SmallGEMMDataset : public GenericDataset<GEMMDataObject, 4>
+{
+public:
+    SmallGEMMDataset()
+        : GenericDataset
+    {
+        GEMMDataObject{ TensorShape(21u, 13u), TensorShape(33u, 21u), TensorShape(33u, 13u), TensorShape(33u, 13u), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(31u, 1u), TensorShape(23u, 31u), TensorShape(23u, 1u), TensorShape(23u, 1u), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(38u, 12u), TensorShape(21u, 38u), TensorShape(21u, 12u), TensorShape(21u, 12u), 0.2f, 1.2f },
+        GEMMDataObject{ TensorShape(32u, 1u), TensorShape(17u, 32u), TensorShape(17u, 1u), TensorShape(17u, 1u), 0.4f, 0.7f },
+    }
+    {
+    }
+
+    ~SmallGEMMDataset() = default;
+};
+
+class LargeGEMMDataset : public GenericDataset<GEMMDataObject, 4>
+{
+public:
+    LargeGEMMDataset()
+        : GenericDataset
+    {
+        GEMMDataObject{ TensorShape(923u, 429u), TensorShape(871u, 923u), TensorShape(871u, 429u), TensorShape(871u, 429u), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(1021u, 1u), TensorShape(783u, 1021u), TensorShape(783u, 1u), TensorShape(783u, 1u), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(681u, 1023u), TensorShape(213u, 681u), TensorShape(213u, 1023u), TensorShape(213u, 1023u), 0.2f, 1.2f },
+        GEMMDataObject{ TensorShape(941u, 1u), TensorShape(623u, 941u), TensorShape(623u, 1u), TensorShape(623u, 1u), 0.4f, 0.7f },
+    }
+    {
+    }
+
+    ~LargeGEMMDataset() = default;
+};
+
+class GoogLeNetGEMMDataset1 : public GenericDataset<GEMMDataObject, 32>
+{
+public:
+    GoogLeNetGEMMDataset1()
+        : GenericDataset
+    {
+        GEMMDataObject{ TensorShape(147U, 12544U), TensorShape(64U, 147U), TensorShape(64U, 12544U), TensorShape(64U, 12544U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(64U, 3136U), TensorShape(64U, 64U), TensorShape(64U, 3136U), TensorShape(64U, 3136U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(576U, 3136U), TensorShape(192U, 576U), TensorShape(192U, 3136U), TensorShape(192U, 3136U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(192U, 784U), TensorShape(64U, 192U), TensorShape(64U, 784U), TensorShape(64U, 784U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(192U, 784U), TensorShape(96U, 192U), TensorShape(96U, 784U), TensorShape(96U, 784U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(864U, 784U), TensorShape(128U, 864U), TensorShape(128U, 784U), TensorShape(128U, 784U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(192U, 784U), TensorShape(16U, 192U), TensorShape(16U, 784U), TensorShape(16U, 784U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(400U, 784U), TensorShape(32U, 400U), TensorShape(32U, 784U), TensorShape(32U, 784U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(192U, 784U), TensorShape(32U, 192U), TensorShape(32U, 784U), TensorShape(32U, 784U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(256U, 784U), TensorShape(128U, 256U), TensorShape(128U, 784U), TensorShape(128U, 784U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(256U, 784U), TensorShape(128U, 256U), TensorShape(128U, 784U), TensorShape(128U, 784U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(1152U, 784U), TensorShape(192U, 1152U), TensorShape(192U, 784U), TensorShape(192U, 784U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(256U, 784U), TensorShape(32U, 256U), TensorShape(32U, 784U), TensorShape(32U, 784U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(800U, 784U), TensorShape(96U, 800U), TensorShape(96U, 784U), TensorShape(96U, 784U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(256U, 784U), TensorShape(64U, 256U), TensorShape(64U, 784U), TensorShape(64U, 784U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(480U, 196U), TensorShape(192U, 480U), TensorShape(192U, 196U), TensorShape(192U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(480U, 196U), TensorShape(96U, 480U), TensorShape(96U, 196U), TensorShape(96U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(864U, 196U), TensorShape(204U, 864U), TensorShape(204U, 196U), TensorShape(204U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(480U, 196U), TensorShape(16U, 480U), TensorShape(16U, 196U), TensorShape(16U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(400U, 196U), TensorShape(48U, 400U), TensorShape(48U, 196U), TensorShape(48U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(480U, 196U), TensorShape(64U, 480U), TensorShape(64U, 196U), TensorShape(64U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(508U, 196U), TensorShape(160U, 508U), TensorShape(160U, 196U), TensorShape(160U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(508U, 196U), TensorShape(112U, 508U), TensorShape(112U, 196U), TensorShape(112U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(1008U, 196U), TensorShape(224U, 1008U), TensorShape(224U, 196U), TensorShape(224U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(508U, 196U), TensorShape(24U, 508U), TensorShape(24U, 196U), TensorShape(24U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(600U, 196U), TensorShape(64U, 600U), TensorShape(64U, 196U), TensorShape(64U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(508U, 196U), TensorShape(64U, 508U), TensorShape(64U, 196U), TensorShape(64U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(512U, 196U), TensorShape(128U, 512U), TensorShape(128U, 196U), TensorShape(128U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(512U, 196U), TensorShape(128U, 512U), TensorShape(128U, 196U), TensorShape(128U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(1152U, 196U), TensorShape(256U, 1152U), TensorShape(256U, 196U), TensorShape(256U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(512U, 196U), TensorShape(24U, 512U), TensorShape(24U, 196U), TensorShape(24U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(600U, 196U), TensorShape(64U, 600U), TensorShape(64U, 196U), TensorShape(64U, 196U), 1.0f, 0.0f }
+    }
+    {
+    }
+
+    ~GoogLeNetGEMMDataset1() = default;
+};
+
+class GoogLeNetGEMMDataset2 : public GenericDataset<GEMMDataObject, 32>
+{
+public:
+    GoogLeNetGEMMDataset2()
+        : GenericDataset
+    {
+        GEMMDataObject{ TensorShape(512U, 196U), TensorShape(64U, 512U), TensorShape(64U, 196U), TensorShape(64U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(512U, 196U), TensorShape(112U, 512U), TensorShape(112U, 196U), TensorShape(112U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(512U, 196U), TensorShape(144U, 512U), TensorShape(144U, 196U), TensorShape(144U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(1296U, 196U), TensorShape(288U, 1296U), TensorShape(288U, 196U), TensorShape(288U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(512U, 196U), TensorShape(32U, 512U), TensorShape(32U, 196U), TensorShape(32U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(800U, 196U), TensorShape(64U, 800U), TensorShape(64U, 196U), TensorShape(64U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(512U, 196U), TensorShape(64U, 512U), TensorShape(64U, 196U), TensorShape(64U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(528U, 196U), TensorShape(256U, 528U), TensorShape(256U, 196U), TensorShape(256U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(528U, 196U), TensorShape(160U, 528U), TensorShape(160U, 196U), TensorShape(160U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(1440U, 196U), TensorShape(320U, 1440U), TensorShape(320U, 196U), TensorShape(320U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(528U, 196U), TensorShape(32U, 528U), TensorShape(32U, 196U), TensorShape(32U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(800U, 196U), TensorShape(128U, 800U), TensorShape(128U, 196U), TensorShape(128U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(528U, 196U), TensorShape(128U, 528U), TensorShape(128U, 196U), TensorShape(128U, 196U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(832U, 49U), TensorShape(256U, 832U), TensorShape(256U, 49U), TensorShape(256U, 49U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(832U, 49U), TensorShape(160U, 832U), TensorShape(160U, 49U), TensorShape(160U, 49U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(1440U, 49U), TensorShape(320U, 1440U), TensorShape(320U, 49U), TensorShape(320U, 49U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(832U, 49U), TensorShape(48U, 832U), TensorShape(48U, 49U), TensorShape(48U, 49U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(1200U, 49U), TensorShape(128U, 1200U), TensorShape(128U, 49U), TensorShape(128U, 49U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(832U, 49U), TensorShape(128U, 832U), TensorShape(128U, 49U), TensorShape(128U, 49U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(832U, 49U), TensorShape(384U, 832U), TensorShape(384U, 49U), TensorShape(384U, 49U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(832U, 49U), TensorShape(192U, 832U), TensorShape(192U, 49U), TensorShape(192U, 49U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(1728U, 49U), TensorShape(384U, 1728U), TensorShape(384U, 49U), TensorShape(384U, 49U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(832U, 49U), TensorShape(48U, 832U), TensorShape(48U, 49U), TensorShape(48U, 49U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(1200U, 49U), TensorShape(128U, 1200U), TensorShape(128U, 49U), TensorShape(128U, 49U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(832U, 49U), TensorShape(128U, 832U), TensorShape(128U, 49U), TensorShape(128U, 49U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(508U, 16U), TensorShape(128U, 508U), TensorShape(128U, 16U), TensorShape(128U, 16U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(2048U, 1U), TensorShape(1024U, 2048U), TensorShape(1024U, 1U), TensorShape(1024U, 1U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(1024U, 1U), TensorShape(1008U, 1024U), TensorShape(1008U, 1U), TensorShape(1008U, 1U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(528U, 16U), TensorShape(128U, 528U), TensorShape(128U, 16U), TensorShape(128U, 16U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(2048U, 1U), TensorShape(1024U, 2048U), TensorShape(1024U, 1U), TensorShape(1024U, 1U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(1024U, 1U), TensorShape(1008U, 1024U), TensorShape(1008U, 1U), TensorShape(1008U, 1U), 1.0f, 0.0f },
+        GEMMDataObject{ TensorShape(1024U, 1U), TensorShape(1008U, 1024U), TensorShape(1008U, 1U), TensorShape(1008U, 1U), 1.0f, 0.0f }
+    }
+    {
+    }
+
+    ~GoogLeNetGEMMDataset2() = default;
+};
+} // namespace test
+} // namespace arm_compute
+#endif //__ARM_COMPUTE_TEST_DATASET_GEMM_DATASET_H__
diff --git a/tests/dataset/GenericDataset.h b/tests/dataset/GenericDataset.h
new file mode 100644
index 0000000000..4ca489bd82
--- /dev/null
+++ b/tests/dataset/GenericDataset.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_DATASET_GENERIC_DATASET_H__
+#define __ARM_COMPUTE_TEST_DATASET_GENERIC_DATASET_H__
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+
+#include <type_traits>
+
+#ifdef BOOST
+#include "boost_wrapper.h"
+#endif
+
+namespace arm_compute
+{
+namespace test
+{
+/** Abstract data set containing multiple objects T.
+ *
+ * Can be used as input for Boost data test cases to automatically run a test
+ * case on different configurations.
+ */
+template <class T, unsigned int Size>
+class GenericDataset
+{
+public:
+    /** Type of the samples in the data set. */
+    using sample = T;
+
+    /** Dimensionality of the data set. */
+    enum
+    {
+        arity = 1
+    };
+
+    /** Number of samples in the data set. */
+#ifdef BOOST
+    boost::unit_test::data::size_t size() const
+#else
+    unsigned int size() const
+#endif
+    {
+        return _data.size();
+    }
+
+    /** Type of the iterator used to step through all samples in the data set.
+     * Needs to support operator*() and operator++() which a pointer does.
+     */
+    using iterator = const T *;
+
+    /** Iterator to the first sample in the data set. */
+    iterator begin() const
+    {
+        return _data.data();
+    }
+
+protected:
+    /** Protected constructor to make the class abstract. */
+    template <typename... Ts>
+    GenericDataset(Ts... objs)
+        : _data{ { objs... } }
+    {
+    }
+
+    /** Protected destructor to prevent deletion of derived class through a
+     * pointer to the base class.
+     */
+    ~GenericDataset() = default;
+
+private:
+    std::array<T, Size> _data;
+};
+} // namespace test
+} // namespace arm_compute
+#endif //__ARM_COMPUTE_TEST_DATASET_GENERIC_DATASET_H__
diff --git a/tests/dataset/ImageDatasets.h b/tests/dataset/ImageDatasets.h
new file mode 100644
index 0000000000..555227e83b
--- /dev/null
+++ b/tests/dataset/ImageDatasets.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_IMAGE_DATASETS_H__
+#define __ARM_COMPUTE_TEST_IMAGE_DATASETS_H__
+
+#include <string>
+#include <type_traits>
+
+#ifdef BOOST
+#include "boost_wrapper.h"
+#endif
+
+namespace arm_compute
+{
+namespace test
+{
+/** Abstract data set containing image names.
+ *
+ * Can be used as input for Boost data test cases to automatically run a test
+ * case on different images.
+ */
+template <unsigned int Size>
+class ImageDataset
+{
+public:
+    /** Type of the samples in the data set. */
+    using sample = const std::string;
+
+    /** Dimensionality of the data set. */
+    enum
+    {
+        arity = 1
+    };
+
+    /** Number of samples in the data set. */
+#ifdef BOOST
+    boost::unit_test::data::size_t size() const
+#else
+    unsigned int size() const
+#endif
+    {
+        return _images.size();
+    }
+
+    /** Type of the iterator used to step through all samples in the data set.
+     * Needs to support operator*() and operator++() which a pointer does.
+     */
+    using iterator = const std::string *;
+
+    /** Iterator to the first sample in the data set. */
+    iterator begin() const
+    {
+        return _images.data();
+    }
+
+protected:
+    /** Protected constructor to make the class abstract. */
+    template <typename... Ts>
+    ImageDataset(Ts... images)
+        : _images{ { images... } }
+    {
+    }
+
+    /** Protected destructor to prevent deletion of derived class through a
+     * pointer to the base class.
+     */
+    ~ImageDataset() = default;
+
+private:
+    std::array<std::string, Size> _images;
+};
+
+/** Data set containing names of small images. */
+class SmallImages final : public ImageDataset<2>
+{
+public:
+    SmallImages()
+        : ImageDataset("128x128.ppm", "640x480.ppm")
+    {
+    }
+};
+
+/** Data set containing names of large images. */
+class LargeImages final : public ImageDataset<3>
+{
+public:
+    LargeImages()
+#ifdef INTERNAL_ONLY
+        : ImageDataset("1280x720.ppm", "1920x1080.ppm", "4160x3120.ppm")
+          // The 4k image is too large to distribute
+#else
+        : ImageDataset("1280x720.ppm", "1920x1080.ppm")
+#endif /* INTERNAL_ONLY */
+    {
+    }
+};
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/dataset/InterpolationPolicyDataset.h b/tests/dataset/InterpolationPolicyDataset.h
new file mode 100644
index 0000000000..e6062eae53
--- /dev/null
+++ b/tests/dataset/InterpolationPolicyDataset.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_INTERPOLATION_POLICY_DATASET_H__
+#define __ARM_COMPUTE_TEST_INTERPOLATION_POLICY_DATASET_H__
+
+#include "arm_compute/core/Types.h"
+
+#ifdef BOOST
+#include "boost_wrapper.h"
+#endif
+
+namespace arm_compute
+{
+namespace test
+{
+/** Data set containing all possible interpolation policies.
+ *
+ * Can be used as input for Boost data test cases to automatically run a test
+ * case on all interpolation policies.
+ */
+class InterpolationPolicies
+{
+public:
+    /** Type of the samples in the data set. */
+    using sample = InterpolationPolicy;
+
+    /** Dimensionality of the data set. */
+    enum
+    {
+        arity = 1
+    };
+
+    /** Number of samples in the data set. */
+#ifdef BOOST
+    boost::unit_test::data::size_t size() const
+#else
+    unsigned int size() const
+#endif
+    {
+        return _policies.size();
+    }
+
+    /** Type of the iterator used to step through all samples in the data set.
+     * Needs to support operator*() and operator++() which a pointer does.
+     */
+    using iterator = const InterpolationPolicy *;
+
+    /** Iterator to the first sample in the data set. */
+    iterator begin() const
+    {
+        return _policies.data();
+    }
+
+private:
+    std::array<InterpolationPolicy, 3> _policies{ { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR, InterpolationPolicy::AREA } };
+};
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/dataset/NormalizationLayerDataset.h b/tests/dataset/NormalizationLayerDataset.h
new file mode 100644
index 0000000000..7234f41551
--- /dev/null
+++ b/tests/dataset/NormalizationLayerDataset.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_DATASET_NORMALIZATION_LAYER_DATASET_H__
+#define __ARM_COMPUTE_TEST_DATASET_NORMALIZATION_LAYER_DATASET_H__
+
+#include "TypePrinter.h"
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "dataset/GenericDataset.h"
+
+#include <sstream>
+#include <type_traits>
+
+#ifdef BOOST
+#include "boost_wrapper.h"
+#endif
+
+namespace arm_compute
+{
+namespace test
+{
+class NormalizationLayerDataObject
+{
+public:
+    operator std::string() const
+    {
+        std::stringstream ss;
+        ss << "NormalizationLayer";
+        ss << "_I" << shape;
+        ss << "_F_" << info.type();
+        ss << "_S_" << info.norm_size();
+        return ss.str();
+    }
+
+public:
+    TensorShape            shape;
+    NormalizationLayerInfo info;
+};
+
+template <unsigned int Size>
+using NormalizationLayerDataset = GenericDataset<NormalizationLayerDataObject, Size>;
+
+class GoogLeNetNormalizationLayerDataset final : public NormalizationLayerDataset<2>
+{
+public:
+    GoogLeNetNormalizationLayerDataset()
+        : GenericDataset
+    {
+        // conv2/norm2
+        NormalizationLayerDataObject{ TensorShape(56U, 56U, 192U), NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f) },
+        // pool1/norm1
+        NormalizationLayerDataObject{ TensorShape(56U, 56U, 64U), NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f) }
+    }
+    {
+    }
+
+    ~GoogLeNetNormalizationLayerDataset() = default;
+};
+
+class AlexNetNormalizationLayerDataset final : public NormalizationLayerDataset<2>
+{
+public:
+    AlexNetNormalizationLayerDataset()
+        : GenericDataset
+    {
+        NormalizationLayerDataObject{ TensorShape(55U, 55U, 96U), NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f) },
+        NormalizationLayerDataObject{ TensorShape(27U, 27U, 256U), NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f) },
+    }
+    {
+    }
+
+    ~AlexNetNormalizationLayerDataset() = default;
+};
+
+} // namespace test
+} // namespace arm_compute
+#endif //__ARM_COMPUTE_TEST_DATASET_NORMALIZATION_LAYER_DATASET_H__
diff --git a/tests/dataset/NormalizationTypeDataset.h b/tests/dataset/NormalizationTypeDataset.h
new file mode 100644
index 0000000000..bb1975c6c9
--- /dev/null
+++ b/tests/dataset/NormalizationTypeDataset.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_NORMALIZATION_TYPE_DATASET_H__
+#define __ARM_COMPUTE_TEST_NORMALIZATION_TYPE_DATASET_H__
+
+#include "arm_compute/core/Types.h"
+
+#ifdef BOOST
+#include "boost_wrapper.h"
+#endif
+
+namespace arm_compute
+{
+namespace test
+{
+/** Data set containing all possible normalization types.
+ *
+ * Can be used as input for Boost data test cases to automatically run a test
+ * case on all normalization types.
+ */
+class NormalizationTypes
+{
+public:
+    /** Type of the samples in the data set. */
+    using sample = NormType;
+
+    /** Dimensionality of the data set. */
+    enum
+    {
+        arity = 1
+    };
+
+    /** Number of samples in the data set. */
+#ifdef BOOST
+    boost::unit_test::data::size_t size() const
+#else
+    unsigned int size() const
+#endif
+    {
+        return _types.size();
+    }
+
+    /** Type of the iterator used to step through all samples in the data set.
+     * Needs to support operator*() and operator++() which a pointer does.
+     */
+    using iterator = const NormType *;
+
+    /** Iterator to the first sample in the data set. */
+    iterator begin() const
+    {
+        return _types.data();
+    }
+
+private:
+    std::array<NormType, 3> _types{ { NormType::IN_MAP_1D, NormType::IN_MAP_2D, NormType::CROSS_MAP } };
+};
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/dataset/PoolingLayerDataset.h b/tests/dataset/PoolingLayerDataset.h
new file mode 100644
index 0000000000..0a50a763ce
--- /dev/null
+++ b/tests/dataset/PoolingLayerDataset.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_DATASET_POOLING_LAYER_DATASET_H__
+#define __ARM_COMPUTE_TEST_DATASET_POOLING_LAYER_DATASET_H__
+
+#include "TypePrinter.h"
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "dataset/GenericDataset.h"
+
+#include <type_traits>
+
+#ifdef BOOST
+#include "boost_wrapper.h"
+#endif
+
+namespace arm_compute
+{
+namespace test
+{
+class PoolingLayerDataObject
+{
+public:
+    operator std::string() const
+    {
+        std::stringstream ss;
+        ss << "PoolingLayer";
+        ss << "_I" << src_shape;
+        ss << "_S_" << info.pool_size();
+        ss << "_F_" << info.pool_type();
+        ss << "_PS" << info.pad_stride_info();
+        return ss.str();
+    }
+
+    friend std::ostream &operator<<(std::ostream &s, const PoolingLayerDataObject &obj)
+    {
+        s << static_cast<std::string>(obj);
+        return s;
+    }
+
+public:
+    TensorShape      src_shape;
+    TensorShape      dst_shape;
+    PoolingLayerInfo info;
+};
+
+template <unsigned int Size>
+using PoolingLayerDataset = GenericDataset<PoolingLayerDataObject, Size>;
+
+class AlexNetPoolingLayerDataset final : public PoolingLayerDataset<3>
+{
+public:
+    AlexNetPoolingLayerDataset()
+        : GenericDataset
+    {
+        PoolingLayerDataObject{ TensorShape(55U, 55U, 96U), TensorShape(27U, 27U, 96U), PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0)) },
+        PoolingLayerDataObject{ TensorShape(27U, 27U, 256U), TensorShape(13U, 13U, 256U), PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0)) },
+        PoolingLayerDataObject{ TensorShape(13U, 13U, 256U), TensorShape(6U, 6U, 256U), PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0)) },
+    }
+    {
+    }
+
+    ~AlexNetPoolingLayerDataset() = default;
+};
+
+class LeNet5PoolingLayerDataset final : public PoolingLayerDataset<2>
+{
+public:
+    LeNet5PoolingLayerDataset()
+        : GenericDataset
+    {
+        PoolingLayerDataObject{ TensorShape(24U, 24U, 20U), TensorShape(12U, 12U, 20U), PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0)) },
+        PoolingLayerDataObject{ TensorShape(8U, 8U, 50U), TensorShape(4U, 4U, 50U), PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0)) },
+    }
+    {
+    }
+
+    ~LeNet5PoolingLayerDataset() = default;
+};
+
+class GoogLeNetPoolingLayerDataset final : public PoolingLayerDataset<10>
+{
+public:
+    GoogLeNetPoolingLayerDataset()
+        : GenericDataset
+    {
+        // FIXME: Add support for 7x7 pooling layer pool5/7x7_s1
+        // pool1/3x3_s2
+        PoolingLayerDataObject{ TensorShape(112U, 112U, 64U), TensorShape(56U, 56U, 64U), PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)) },
+        // pool2/3x3_s2
+        PoolingLayerDataObject{ TensorShape(56U, 56U, 192U), TensorShape(28U, 28U, 192U), PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)) },
+        // inception_3a/pool
+        PoolingLayerDataObject{ TensorShape(28U, 28U, 192U), TensorShape(28U, 28U, 192U), PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL)) },
+        // inception_3b/pool
+        PoolingLayerDataObject{ TensorShape(28U, 28U, 256U), TensorShape(28U, 28U, 256U), PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL)) },
+        // pool3/3x3_s2
+        PoolingLayerDataObject{ TensorShape(28U, 28U, 480U), TensorShape(14U, 14U, 480U), PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)) },
+        // inception_4a/pool
+        PoolingLayerDataObject{ TensorShape(14U, 14U, 480U), TensorShape(14U, 14U, 480U), PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL)) },
+        // inception_4b/pool, inception_4c/pool, inception_4d/pool
+        PoolingLayerDataObject{ TensorShape(14U, 14U, 512U), TensorShape(14U, 14U, 512U), PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL)) },
+        // inception_4e/pool
+        PoolingLayerDataObject{ TensorShape(14U, 14U, 528U), TensorShape(14U, 14U, 528U), PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL)) },
+        // pool4/3x3_s2
+        PoolingLayerDataObject{ TensorShape(14U, 14U, 832U), TensorShape(7U, 7U, 832U), PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)) },
+        // inception_5a/pool, inception_5b/pool
+        PoolingLayerDataObject{ TensorShape(7U, 7U, 832U), TensorShape(7U, 7U, 832U), PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL)) },
+    }
+    {
+    }
+
+    ~GoogLeNetPoolingLayerDataset() = default;
+};
+
+class RandomPoolingLayerDataset final : public PoolingLayerDataset<8>
+{
+public:
+    RandomPoolingLayerDataset()
+        : GenericDataset
+    {
+        PoolingLayerDataObject{ TensorShape(27U, 27U, 16U), TensorShape(13U, 13U, 16U), PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0)) },
+        PoolingLayerDataObject{ TensorShape(13U, 13U, 32U), TensorShape(6U, 6U, 32U), PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0)) },
+        PoolingLayerDataObject{ TensorShape(24U, 24U, 10U), TensorShape(12U, 12U, 10U), PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0)) },
+        PoolingLayerDataObject{ TensorShape(8U, 8U, 30U), TensorShape(4U, 4U, 30U), PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0)) },
+        PoolingLayerDataObject{ TensorShape(27U, 27U, 16U), TensorShape(13U, 13U, 16U), PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(2, 2, 0, 0)) },
+        PoolingLayerDataObject{ TensorShape(13U, 13U, 32U), TensorShape(6U, 6U, 32U), PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(2, 2, 0, 0)) },
+        PoolingLayerDataObject{ TensorShape(24U, 24U, 10U), TensorShape(12U, 12U, 10U), PoolingLayerInfo(PoolingType::AVG, 2, PadStrideInfo(2, 2, 0, 0)) },
+        PoolingLayerDataObject{ TensorShape(8U, 8U, 30U), TensorShape(4U, 4U, 30U), PoolingLayerInfo(PoolingType::AVG, 2, PadStrideInfo(2, 2, 0, 0)) },
+    }
+    {
+    }
+
+    ~RandomPoolingLayerDataset() = default;
+};
+} // namespace test
+} // namespace arm_compute
+#endif //__ARM_COMPUTE_TEST_DATASET_POOLING_LAYER_DATASET_H__
diff --git a/tests/dataset/RoundingPolicyDataset.h b/tests/dataset/RoundingPolicyDataset.h
new file mode 100644
index 0000000000..c70872020b
--- /dev/null
+++ b/tests/dataset/RoundingPolicyDataset.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_ROUNDING_POLICY_DATASETS_H__
+#define __ARM_COMPUTE_TEST_ROUNDING_POLICY_DATASETS_H__
+
+#include "arm_compute/core/Types.h"
+
+#include <type_traits>
+
+#ifdef BOOST
+#include "boost_wrapper.h"
+#endif
+
+namespace arm_compute
+{
+namespace test
+{
+/** Data set containing all possible rounding policies.
+ *
+ * Can be used as input for Boost data test cases to automatically run a test
+ * case on different rounding policies.
+ */
+class RoundingPolicies
+{
+public:
+    /** Type of the samples in the data set. */
+    using sample = RoundingPolicy;
+
+    /** Dimensionality of the data set. */
+    enum
+    {
+        arity = 1
+    };
+
+    /** Number of samples in the data set. */
+#ifdef BOOST
+    boost::unit_test::data::size_t size() const
+#else
+    unsigned int size() const
+#endif
+    {
+        return _policies.size();
+    }
+
+    /** Type of the iterator used to step through all samples in the data set.
+     * Needs to support operator*() and operator++() which a pointer does.
+     */
+    using iterator = const RoundingPolicy *;
+
+    /** Iterator to the first sample in the data set. */
+    iterator begin() const
+    {
+        return _policies.data();
+    }
+
+private:
+    std::array<RoundingPolicy, 3> _policies{ { RoundingPolicy::TO_ZERO, RoundingPolicy::TO_NEAREST_UP, RoundingPolicy::TO_NEAREST_EVEN } };
+};
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/dataset/ShapeDatasets.h b/tests/dataset/ShapeDatasets.h
new file mode 100644
index 0000000000..73bdb8ea0e
--- /dev/null
+++ b/tests/dataset/ShapeDatasets.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_SHAPE_DATASETS_H__
+#define __ARM_COMPUTE_TEST_SHAPE_DATASETS_H__
+
+#include "arm_compute/core/TensorShape.h"
+
+#include <type_traits>
+
+#ifdef BOOST
+#include "boost_wrapper.h"
+#endif
+
+namespace arm_compute
+{
+namespace test
+{
+/** Abstract data set containing tensor shapes.
+ *
+ * Can be used as input for Boost data test cases to automatically run a test
+ * case on different tensor shapes.
+ */
+template <unsigned int Size>
+class ShapeDataset
+{
+public:
+    /** Type of the samples in the data set. */
+    using sample = TensorShape;
+
+    /** Dimensionality of the data set. */
+    enum
+    {
+        arity = 1
+    };
+
+    /** Number of samples in the data set. */
+#ifdef BOOST
+    boost::unit_test::data::size_t size() const
+#else
+    unsigned int size() const
+#endif
+    {
+        return _shapes.size();
+    }
+
+    /** Type of the iterator used to step through all samples in the data set.
+     * Needs to support operator*() and operator++() which a pointer does.
+     */
+    using iterator = const TensorShape *;
+
+    /** Iterator to the first sample in the data set. */
+    iterator begin() const
+    {
+        return _shapes.data();
+    }
+
+protected:
+    /** Protected constructor to make the class abstract. */
+    template <typename... Ts>
+    ShapeDataset(Ts... shapes)
+        : _shapes{ { shapes... } }
+    {
+    }
+
+    /** Protected destructor to prevent deletion of derived class through a
+     * pointer to the base class.
+     */
+    ~ShapeDataset() = default;
+
+private:
+    std::array<TensorShape, Size> _shapes;
+};
+
+/** Data set containing one 1D tensor shape. */
+class Small1DShape final : public ShapeDataset<1>
+{
+public:
+    Small1DShape()
+        : ShapeDataset(TensorShape(128U))
+    {
+    }
+};
+
+/** Data set containing small tensor shapes. */
+class SmallShapes final : public ShapeDataset<3>
+{
+public:
+    SmallShapes()
+        : ShapeDataset(TensorShape(5U, 5U),
+                       TensorShape(27U, 13U, 2U),
+                       TensorShape(128U, 64U, 1U, 3U))
+    {
+    }
+};
+
+/** Data set containing large tensor shapes. */
+class LargeShapes final : public ShapeDataset<3>
+{
+public:
+    LargeShapes()
+        : ShapeDataset(TensorShape(1920U, 1080U),
+                       TensorShape(1245U, 652U, 1U, 3U),
+                       TensorShape(4160U, 3120U))
+    {
+    }
+};
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/dataset/ThresholdDataset.h b/tests/dataset/ThresholdDataset.h
new file mode 100644
index 0000000000..956cf3d54d
--- /dev/null
+++ b/tests/dataset/ThresholdDataset.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_DATASET_THRESHOLD_DATASET_H__
+#define __ARM_COMPUTE_TEST_DATASET_THRESHOLD_DATASET_H__
+
+#include "TypePrinter.h"
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "dataset/GenericDataset.h"
+
+#include <ostream>
+#include <sstream>
+
+#include <tuple>
+#include <type_traits>
+
+#ifdef BOOST
+#include "boost_wrapper.h"
+#endif
+
+namespace arm_compute
+{
+namespace test
+{
+class ThresholdDataObject
+{
+public:
+    uint8_t       threshold;
+    uint8_t       false_value;
+    uint8_t       true_value;
+    ThresholdType type;
+    uint8_t       upper;
+
+    operator std::string() const
+    {
+        std::stringstream ss;
+        ss << "Threshold";
+        ss << "_threshold_value" << threshold;
+        ss << "_false_value" << false_value;
+        ss << "_true_value" << true_value;
+        ss << "_type";
+        ss << ((type == ThresholdType::BINARY) ? "binary" : "range");
+        ss << "_upper" << upper;
+        return ss.str();
+    }
+
+    friend std::ostream &operator<<(std::ostream &os, const ThresholdDataObject &obj)
+    {
+        os << static_cast<std::string>(obj);
+        return os;
+    }
+};
+
+class ThresholdDataset : public GenericDataset<ThresholdDataObject, 4>
+{
+public:
+    ThresholdDataset()
+        : GenericDataset
+    {
+        ThresholdDataObject{ 10U, 25U, 3U, ThresholdType::BINARY, 0U },
+        ThresholdDataObject{ 20U, 1U, 0U, ThresholdType::BINARY, 0U },
+        ThresholdDataObject{ 30U, 1U, 0U, ThresholdType::RANGE, 100U },
+        ThresholdDataObject{ 100U, 1U, 0U, ThresholdType::RANGE, 200U },
+    }
+    {
+    }
+
+    ~ThresholdDataset() = default;
+};
+
+} // namespace test
+} // namespace arm_compute
+#endif //__ARM_COMPUTE_TEST_DATASET_THRESHOLD_DATASET_H__
diff --git a/tests/model_objects/AlexNet.h b/tests/model_objects/AlexNet.h
new file mode 100644
index 0000000000..d49ef0645a
--- /dev/null
+++ b/tests/model_objects/AlexNet.h
@@ -0,0 +1,582 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_MODEL_OBJECTS_ALEXNET_H__
+#define __ARM_COMPUTE_TEST_MODEL_OBJECTS_ALEXNET_H__
+
+#include "TensorLibrary.h"
+#include "Utils.h"
+
+#include <memory>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace model_objects
+{
+/** AlexNet model object */
+template <typename ITensorType,
+          typename TensorType,
+          typename SubTensorType,
+          typename Accessor,
+          typename ActivationLayerFunction,
+          typename ConvolutionLayerFunction,
+          typename FullyConnectedLayerFunction,
+          typename NormalizationLayerFunction,
+          typename PoolingLayerFunction,
+          typename SoftmaxLayerFunction,
+          DataType dt                   = DataType::F32,
+          int      fixed_point_position = 4>
+class AlexNet
+{
+public:
+    AlexNet()
+        : _batches(1), _reshaped_weights(false)
+    {
+    }
+
+    void init_weights(unsigned int batches, bool reshaped_weights = false)
+    {
+        _batches          = batches;
+        _reshaped_weights = reshaped_weights;
+
+        // Initialize weights and biases
+        if(!_reshaped_weights)
+        {
+            for(auto &wi : w)
+            {
+                wi = std::unique_ptr<TensorType>(new TensorType());
+            }
+            for(auto &bi : b)
+            {
+                bi = std::unique_ptr<TensorType>(new TensorType());
+            }
+            w[0]->allocator()->init(TensorInfo(TensorShape(11U, 11U, 3U, 96U), 1, dt, fixed_point_position));
+            b[0]->allocator()->init(TensorInfo(TensorShape(96U), 1, dt, fixed_point_position));
+            w[1]->allocator()->init(TensorInfo(TensorShape(5U, 5U, 48U, 256U), 1, dt, fixed_point_position));
+            b[1]->allocator()->init(TensorInfo(TensorShape(256U), 1, dt, fixed_point_position));
+            w[2]->allocator()->init(TensorInfo(TensorShape(3U, 3U, 256U, 384U), 1, dt, fixed_point_position));
+            b[2]->allocator()->init(TensorInfo(TensorShape(384U), 1, dt, fixed_point_position));
+            w[3]->allocator()->init(TensorInfo(TensorShape(3U, 3U, 192U, 384U), 1, dt, fixed_point_position));
+            b[3]->allocator()->init(TensorInfo(TensorShape(384U), 1, dt, fixed_point_position));
+            w[4]->allocator()->init(TensorInfo(TensorShape(3U, 3U, 192U, 256U), 1, dt, fixed_point_position));
+            b[4]->allocator()->init(TensorInfo(TensorShape(256U), 1, dt, fixed_point_position));
+            w[5]->allocator()->init(TensorInfo(TensorShape(9216U, 4096U), 1, dt, fixed_point_position));
+            b[5]->allocator()->init(TensorInfo(TensorShape(4096U), 1, dt, fixed_point_position));
+            w[6]->allocator()->init(TensorInfo(TensorShape(4096U, 4096U), 1, dt, fixed_point_position));
+            b[6]->allocator()->init(TensorInfo(TensorShape(4096U), 1, dt, fixed_point_position));
+            w[7]->allocator()->init(TensorInfo(TensorShape(4096U, 1000U), 1, dt, fixed_point_position));
+            b[7]->allocator()->init(TensorInfo(TensorShape(1000U), 1, dt, fixed_point_position));
+
+            w21 = std::unique_ptr<SubTensorType>(new SubTensorType(w[1].get(), TensorShape(5U, 5U, 48U, 128U), Coordinates()));
+            w22 = std::unique_ptr<SubTensorType>(new SubTensorType(w[1].get(), TensorShape(5U, 5U, 48U, 128U), Coordinates(0, 0, 0, 128)));
+            b21 = std::unique_ptr<SubTensorType>(new SubTensorType(b[1].get(), TensorShape(128U), Coordinates()));
+            b22 = std::unique_ptr<SubTensorType>(new SubTensorType(b[1].get(), TensorShape(128U), Coordinates(128)));
+
+            w41 = std::unique_ptr<SubTensorType>(new SubTensorType(w[3].get(), TensorShape(3U, 3U, 192U, 192U), Coordinates()));
+            w42 = std::unique_ptr<SubTensorType>(new SubTensorType(w[3].get(), TensorShape(3U, 3U, 192U, 192U), Coordinates(0, 0, 0, 192)));
+            b41 = std::unique_ptr<SubTensorType>(new SubTensorType(b[3].get(), TensorShape(192U), Coordinates()));
+            b42 = std::unique_ptr<SubTensorType>(new SubTensorType(b[3].get(), TensorShape(192U), Coordinates(192)));
+
+            w51 = std::unique_ptr<SubTensorType>(new SubTensorType(w[4].get(), TensorShape(3U, 3U, 192U, 128U), Coordinates()));
+            w52 = std::unique_ptr<SubTensorType>(new SubTensorType(w[4].get(), TensorShape(3U, 3U, 192U, 128U), Coordinates(0, 0, 0, 128)));
+            b51 = std::unique_ptr<SubTensorType>(new SubTensorType(b[4].get(), TensorShape(128U), Coordinates()));
+            b52 = std::unique_ptr<SubTensorType>(new SubTensorType(b[4].get(), TensorShape(128U), Coordinates(128)));
+        }
+        else
+        {
+            const unsigned int dt_size = 16 / arm_compute::data_size_from_type(dt);
+
+            // Create tensor for the reshaped weights
+            w[0]            = std::unique_ptr<TensorType>(new TensorType());
+            auto w21_tensor = std::unique_ptr<TensorType>(new TensorType());
+            auto w22_tensor = std::unique_ptr<TensorType>(new TensorType());
+            w[2]            = std::unique_ptr<TensorType>(new TensorType());
+            auto w41_tensor = std::unique_ptr<TensorType>(new TensorType());
+            auto w42_tensor = std::unique_ptr<TensorType>(new TensorType());
+            auto w51_tensor = std::unique_ptr<TensorType>(new TensorType());
+            auto w52_tensor = std::unique_ptr<TensorType>(new TensorType());
+
+            w[0]->allocator()->init(TensorInfo(TensorShape(366U * dt_size, 96U / dt_size), 1, dt, fixed_point_position));
+            w21_tensor->allocator()->init(TensorInfo(TensorShape(1248U * dt_size, 128U / dt_size), 1, dt, fixed_point_position));
+            w22_tensor->allocator()->init(TensorInfo(TensorShape(1248U * dt_size, 128U / dt_size), 1, dt, fixed_point_position));
+            w[2]->allocator()->init(TensorInfo(TensorShape(2560U * dt_size, 384U / dt_size), 1, dt, fixed_point_position));
+            w41_tensor->allocator()->init(TensorInfo(TensorShape(1920U * dt_size, 192U / dt_size), 1, dt, fixed_point_position));
+            w42_tensor->allocator()->init(TensorInfo(TensorShape(1920U * dt_size, 192U / dt_size), 1, dt, fixed_point_position));
+            w51_tensor->allocator()->init(TensorInfo(TensorShape(1920U * dt_size, 128U / dt_size), 1, dt, fixed_point_position));
+            w52_tensor->allocator()->init(TensorInfo(TensorShape(1920U * dt_size, 128U / dt_size), 1, dt, fixed_point_position));
+
+            w21 = std::move(w21_tensor);
+            w22 = std::move(w22_tensor);
+            w41 = std::move(w41_tensor);
+            w42 = std::move(w42_tensor);
+            w51 = std::move(w51_tensor);
+            w52 = std::move(w52_tensor);
+
+            w[5] = std::unique_ptr<TensorType>(new TensorType());
+            w[6] = std::unique_ptr<TensorType>(new TensorType());
+            w[7] = std::unique_ptr<TensorType>(new TensorType());
+            b[5] = std::unique_ptr<TensorType>(new TensorType());
+            b[6] = std::unique_ptr<TensorType>(new TensorType());
+            b[7] = std::unique_ptr<TensorType>(new TensorType());
+
+            b[5]->allocator()->init(TensorInfo(TensorShape(4096U), 1, dt, fixed_point_position));
+            b[6]->allocator()->init(TensorInfo(TensorShape(4096U), 1, dt, fixed_point_position));
+            b[7]->allocator()->init(TensorInfo(TensorShape(1000U), 1, dt, fixed_point_position));
+
+            if(_batches > 1)
+            {
+                w[5]->allocator()->init(TensorInfo(TensorShape(9216U * dt_size, 4096U / dt_size), 1, dt, fixed_point_position));
+                w[6]->allocator()->init(TensorInfo(TensorShape(4096U * dt_size, 4096U / dt_size), 1, dt, fixed_point_position));
+                w[7]->allocator()->init(TensorInfo(TensorShape(4096U * dt_size, 1000U / dt_size), 1, dt, fixed_point_position));
+            }
+            else
+            {
+                w[5]->allocator()->init(TensorInfo(TensorShape(4096U, 9216U), 1, dt, fixed_point_position));
+                w[6]->allocator()->init(TensorInfo(TensorShape(4096U, 4096U), 1, dt, fixed_point_position));
+                w[7]->allocator()->init(TensorInfo(TensorShape(1000U, 4096U), 1, dt, fixed_point_position));
+            }
+        }
+    }
+
+    void build()
+    {
+        input.allocator()->init(TensorInfo(TensorShape(227U, 227U, 3U, _batches), 1, dt, fixed_point_position));
+        output.allocator()->init(TensorInfo(TensorShape(1000U, _batches), 1, dt, fixed_point_position));
+
+        // Initialize intermediate tensors
+        // Layer 1
+        conv1_out.allocator()->init(TensorInfo(TensorShape(55U, 55U, 96U, _batches), 1, dt, fixed_point_position));
+        act1_out.allocator()->init(TensorInfo(TensorShape(55U, 55U, 96U, _batches), 1, dt, fixed_point_position));
+        norm1_out.allocator()->init(TensorInfo(TensorShape(55U, 55U, 96U, _batches), 1, dt, fixed_point_position));
+        pool1_out.allocator()->init(TensorInfo(TensorShape(27U, 27U, 96U, _batches), 1, dt, fixed_point_position));
+        pool11_out = std::unique_ptr<SubTensorType>(new SubTensorType(&pool1_out, TensorShape(27U, 27U, 48U, _batches), Coordinates()));
+        pool12_out = std::unique_ptr<SubTensorType>(new SubTensorType(&pool1_out, TensorShape(27U, 27U, 48U, _batches), Coordinates(0, 0, 48)));
+        // Layer 2
+        conv2_out.allocator()->init(TensorInfo(TensorShape(27U, 27U, 256U, _batches), 1, dt, fixed_point_position));
+        conv21_out = std::unique_ptr<SubTensorType>(new SubTensorType(&conv2_out, TensorShape(27U, 27U, 128U, _batches), Coordinates()));
+        conv22_out = std::unique_ptr<SubTensorType>(new SubTensorType(&conv2_out, TensorShape(27U, 27U, 128U, _batches), Coordinates(0, 0, 128)));
+        act2_out.allocator()->init(TensorInfo(TensorShape(27U, 27U, 256U, _batches), 1, dt, fixed_point_position));
+        norm2_out.allocator()->init(TensorInfo(TensorShape(27U, 27U, 256U, _batches), 1, dt, fixed_point_position));
+        pool2_out.allocator()->init(TensorInfo(TensorShape(13U, 13U, 256U, _batches), 1, dt, fixed_point_position));
+        // Layer 3
+        conv3_out.allocator()->init(TensorInfo(TensorShape(13U, 13U, 384U, _batches), 1, dt, fixed_point_position));
+        act3_out.allocator()->init(TensorInfo(TensorShape(13U, 13U, 384U, _batches), 1, dt, fixed_point_position));
+        act31_out = std::unique_ptr<SubTensorType>(new SubTensorType(&act3_out, TensorShape(13U, 13U, 192U, _batches), Coordinates()));
+        act32_out = std::unique_ptr<SubTensorType>(new SubTensorType(&act3_out, TensorShape(13U, 13U, 192U, _batches), Coordinates(0, 0, 192)));
+        // Layer 4
+        conv4_out.allocator()->init(TensorInfo(TensorShape(13U, 13U, 384U, _batches), 1, dt, fixed_point_position));
+        conv41_out = std::unique_ptr<SubTensorType>(new SubTensorType(&conv4_out, TensorShape(13U, 13U, 192U, _batches), Coordinates()));
+        conv42_out = std::unique_ptr<SubTensorType>(new SubTensorType(&conv4_out, TensorShape(13U, 13U, 192U, _batches), Coordinates(0, 0, 192)));
+        act4_out.allocator()->init(TensorInfo(TensorShape(13U, 13U, 384U, _batches), 1, dt, fixed_point_position));
+        act41_out = std::unique_ptr<SubTensorType>(new SubTensorType(&act4_out, TensorShape(13U, 13U, 192U, _batches), Coordinates()));
+        act42_out = std::unique_ptr<SubTensorType>(new SubTensorType(&act4_out, TensorShape(13U, 13U, 192U, _batches), Coordinates(0, 0, 192)));
+        // Layer 5
+        conv5_out.allocator()->init(TensorInfo(TensorShape(13U, 13U, 256U, _batches), 1, dt, fixed_point_position));
+        conv51_out = std::unique_ptr<SubTensorType>(new SubTensorType(&conv5_out, TensorShape(13U, 13U, 128U, _batches), Coordinates()));
+        conv52_out = std::unique_ptr<SubTensorType>(new SubTensorType(&conv5_out, TensorShape(13U, 13U, 128U, _batches), Coordinates(0, 0, 128)));
+        act5_out.allocator()->init(TensorInfo(TensorShape(13U, 13U, 256U, _batches), 1, dt, fixed_point_position));
+        pool5_out.allocator()->init(TensorInfo(TensorShape(6U, 6U, 256U, _batches), 1, dt, fixed_point_position));
+        // Layer 6
+        fc6_out.allocator()->init(TensorInfo(TensorShape(4096U, _batches), 1, dt, fixed_point_position));
+        act6_out.allocator()->init(TensorInfo(TensorShape(4096U, _batches), 1, dt, fixed_point_position));
+        // Layer 7
+        fc7_out.allocator()->init(TensorInfo(TensorShape(4096U, _batches), 1, dt, fixed_point_position));
+        act7_out.allocator()->init(TensorInfo(TensorShape(4096U, _batches), 1, dt, fixed_point_position));
+        // Layer 8
+        fc8_out.allocator()->init(TensorInfo(TensorShape(1000U, _batches), 1, dt, fixed_point_position));
+
+        // Allocate layers
+        {
+            // Layer 1
+            conv1 = std::unique_ptr<ConvolutionLayerFunction>(new ConvolutionLayerFunction());
+            act1  = std::unique_ptr<ActivationLayerFunction>(new ActivationLayerFunction());
+            norm1 = std::unique_ptr<NormalizationLayerFunction>(new NormalizationLayerFunction());
+            pool1 = std::unique_ptr<PoolingLayerFunction>(new PoolingLayerFunction());
+            // Layer 2
+            conv21 = std::unique_ptr<ConvolutionLayerFunction>(new ConvolutionLayerFunction());
+            conv22 = std::unique_ptr<ConvolutionLayerFunction>(new ConvolutionLayerFunction());
+            act2   = std::unique_ptr<ActivationLayerFunction>(new ActivationLayerFunction());
+            norm2  = std::unique_ptr<NormalizationLayerFunction>(new NormalizationLayerFunction());
+            pool2  = std::unique_ptr<PoolingLayerFunction>(new PoolingLayerFunction());
+            // Layer 3
+            conv3 = std::unique_ptr<ConvolutionLayerFunction>(new ConvolutionLayerFunction());
+            act3  = std::unique_ptr<ActivationLayerFunction>(new ActivationLayerFunction());
+            // Layer 4
+            conv41 = std::unique_ptr<ConvolutionLayerFunction>(new ConvolutionLayerFunction());
+            conv42 = std::unique_ptr<ConvolutionLayerFunction>(new ConvolutionLayerFunction());
+            act4   = std::unique_ptr<ActivationLayerFunction>(new ActivationLayerFunction());
+            // Layer 5
+            conv51 = std::unique_ptr<ConvolutionLayerFunction>(new ConvolutionLayerFunction());
+            conv52 = std::unique_ptr<ConvolutionLayerFunction>(new ConvolutionLayerFunction());
+            act5   = std::unique_ptr<ActivationLayerFunction>(new ActivationLayerFunction());
+            pool5  = std::unique_ptr<PoolingLayerFunction>(new PoolingLayerFunction());
+            // Layer 6
+            fc6  = std::unique_ptr<FullyConnectedLayerFunction>(new FullyConnectedLayerFunction());
+            act6 = std::unique_ptr<ActivationLayerFunction>(new ActivationLayerFunction());
+            // Layer 7
+            fc7  = std::unique_ptr<FullyConnectedLayerFunction>(new FullyConnectedLayerFunction());
+            act7 = std::unique_ptr<ActivationLayerFunction>(new ActivationLayerFunction());
+            // Layer 8
+            fc8 = std::unique_ptr<FullyConnectedLayerFunction>(new FullyConnectedLayerFunction());
+            // Softmax
+            smx = std::unique_ptr<SoftmaxLayerFunction>(new SoftmaxLayerFunction());
+        }
+
+        // Configure Layers
+        {
+            // Layer 1
+            conv1->configure(&input, w[0].get(), b[0].get(), &conv1_out, PadStrideInfo(4, 4, 0, 0), WeightsInfo(_reshaped_weights, 11U));
+            act1->configure(&conv1_out, &act1_out, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
+            norm1->configure(&act1_out, &norm1_out, NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f));
+            pool1->configure(&norm1_out, &pool1_out, PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0)));
+            // Layer 2
+            conv21->configure(pool11_out.get(), w21.get(), b21.get(), conv21_out.get(), PadStrideInfo(1, 1, 2, 2), WeightsInfo(_reshaped_weights, 5U));
+            conv22->configure(pool12_out.get(), w22.get(), b22.get(), conv22_out.get(), PadStrideInfo(1, 1, 2, 2), WeightsInfo(_reshaped_weights, 5U));
+            act2->configure(&conv2_out, &act2_out, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
+            norm2->configure(&act2_out, &norm2_out, NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f));
+            pool2->configure(&norm2_out, &pool2_out, PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0)));
+            // Layer 3
+            conv3->configure(&pool2_out, w[2].get(), b[2].get(), &conv3_out, PadStrideInfo(1, 1, 1, 1), WeightsInfo(_reshaped_weights, 3U));
+            act3->configure(&conv3_out, &act3_out, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
+            // Layer 4
+            conv41->configure(act31_out.get(), w41.get(), b41.get(), conv41_out.get(), PadStrideInfo(1, 1, 1, 1), WeightsInfo(_reshaped_weights, 3U));
+            conv42->configure(act32_out.get(), w42.get(), b42.get(), conv42_out.get(), PadStrideInfo(1, 1, 1, 1), WeightsInfo(_reshaped_weights, 3U));
+            act4->configure(&conv4_out, &act4_out, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
+            // Layer 5
+            conv51->configure(act41_out.get(), w51.get(), b51.get(), conv51_out.get(), PadStrideInfo(1, 1, 1, 1), WeightsInfo(_reshaped_weights, 3U));
+            conv52->configure(act42_out.get(), w52.get(), b52.get(), conv52_out.get(), PadStrideInfo(1, 1, 1, 1), WeightsInfo(_reshaped_weights, 3U));
+            act5->configure(&conv5_out, &act5_out, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
+            pool5->configure(&act5_out, &pool5_out, PoolingLayerInfo(PoolingType::MAX, 3, PadStrideInfo(2, 2, 0, 0)));
+            // Layer 6
+            fc6->configure(&pool5_out, w[5].get(), b[5].get(), &fc6_out, true, _reshaped_weights);
+            act6->configure(&fc6_out, &act6_out, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
+            // Layer 7
+            fc7->configure(&act6_out, w[6].get(), b[6].get(), &fc7_out, true, _reshaped_weights);
+            act7->configure(&fc7_out, &act7_out, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
+            // Layer 8
+            fc8->configure(&act7_out, w[7].get(), b[7].get(), &fc8_out, true, _reshaped_weights);
+            // Softmax
+            smx->configure(&fc8_out, &output);
+        }
+    }
+
+    void allocate()
+    {
+        input.allocator()->allocate();
+        output.allocator()->allocate();
+        for(auto &wi : w)
+        {
+            if(wi.get())
+            {
+                wi->allocator()->allocate();
+            }
+        }
+        for(auto &bi : b)
+        {
+            if(bi.get())
+            {
+                bi->allocator()->allocate();
+            }
+        }
+        if(_reshaped_weights)
+        {
+            dynamic_cast<TensorType *>(w21.get())->allocator()->allocate();
+            dynamic_cast<TensorType *>(w22.get())->allocator()->allocate();
+            dynamic_cast<TensorType *>(w41.get())->allocator()->allocate();
+            dynamic_cast<TensorType *>(w42.get())->allocator()->allocate();
+            dynamic_cast<TensorType *>(w51.get())->allocator()->allocate();
+            dynamic_cast<TensorType *>(w52.get())->allocator()->allocate();
+        }
+        conv1_out.allocator()->allocate();
+        act1_out.allocator()->allocate();
+        norm1_out.allocator()->allocate();
+        pool1_out.allocator()->allocate();
+        conv2_out.allocator()->allocate();
+        act2_out.allocator()->allocate();
+        norm2_out.allocator()->allocate();
+        pool2_out.allocator()->allocate();
+        conv3_out.allocator()->allocate();
+        act3_out.allocator()->allocate();
+        conv4_out.allocator()->allocate();
+        act4_out.allocator()->allocate();
+        conv5_out.allocator()->allocate();
+        act5_out.allocator()->allocate();
+        pool5_out.allocator()->allocate();
+        fc6_out.allocator()->allocate();
+        act6_out.allocator()->allocate();
+        fc7_out.allocator()->allocate();
+        act7_out.allocator()->allocate();
+        fc8_out.allocator()->allocate();
+    }
+
+    /** Fills the trainable parameters and input with random data. */
+    void fill_random()
+    {
+        library->fill_tensor_uniform(Accessor(input), 0);
+        if(!_reshaped_weights)
+        {
+            for(unsigned int i = 0; i < w.size(); ++i)
+            {
+                library->fill_tensor_uniform(Accessor(*w[i]), i + 1);
+                library->fill_tensor_uniform(Accessor(*b[i]), i + 10);
+            }
+        }
+        else
+        {
+            library->fill_tensor_uniform(Accessor(*w[0]), 1);
+            library->fill_tensor_uniform(Accessor(*w[2]), 2);
+
+            library->fill_tensor_uniform(Accessor(*w[5]), 3);
+            library->fill_tensor_uniform(Accessor(*b[5]), 4);
+            library->fill_tensor_uniform(Accessor(*w[6]), 5);
+            library->fill_tensor_uniform(Accessor(*b[6]), 6);
+            library->fill_tensor_uniform(Accessor(*w[7]), 7);
+            library->fill_tensor_uniform(Accessor(*b[7]), 8);
+
+            library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w21.get())), 9);
+            library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w22.get())), 10);
+            library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w41.get())), 11);
+            library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w42.get())), 12);
+            library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w51.get())), 13);
+            library->fill_tensor_uniform(Accessor(*dynamic_cast<TensorType *>(w52.get())), 14);
+        }
+    }
+
+#ifdef INTERNAL_ONLY
+    /** Fills the trainable parameters from binary files
+     *
+     * @param weights Files names containing the weights data
+     * @param biases  Files names containing the bias data
+     */
+    void fill(std::vector<std::string> weights, std::vector<std::string> biases)
+    {
+        ARM_COMPUTE_ERROR_ON(weights.size() != w.size());
+        ARM_COMPUTE_ERROR_ON(biases.size() != b.size());
+        ARM_COMPUTE_ERROR_ON(_reshaped_weights);
+
+        for(unsigned int i = 0; i < weights.size(); ++i)
+        {
+            library->fill_layer_data(Accessor(*w[i]), weights[i]);
+            library->fill_layer_data(Accessor(*b[i]), biases[i]);
+        }
+    }
+
+    /** Feed input to network from file.
+     *
+     * @param name File name of containing the input data.
+     */
+    void feed(std::string name)
+    {
+        library->fill_layer_data(Accessor(input), name);
+    }
+#endif /* INTERNAL_ONLY */
+
+    /** Get the classification results.
+     *
+     * @return Vector containing the classified labels
+     */
+    std::vector<unsigned int> get_classifications()
+    {
+        std::vector<unsigned int> classified_labels;
+        Accessor                  output_accessor(output);
+
+        Window window;
+        window.set(Window::DimX, Window::Dimension(0, 1, 1));
+        for(unsigned int d = 1; d < output_accessor.shape().num_dimensions(); ++d)
+        {
+            window.set(d, Window::Dimension(0, output_accessor.shape()[d], 1));
+        }
+
+        execute_window_loop(window, [&](const Coordinates & id)
+        {
+            int               max_idx = 0;
+            float             val     = 0;
+            const void *const out_ptr = output_accessor(id);
+            for(unsigned int l = 0; l < output_accessor.shape().x(); ++l)
+            {
+                float curr_val = reinterpret_cast<const float *>(out_ptr)[l];
+                if(curr_val > val)
+                {
+                    max_idx = l;
+                    val     = curr_val;
+                }
+            }
+            classified_labels.push_back(max_idx);
+        });
+        return classified_labels;
+    }
+
+    /** Clear all allocated memory from the tensor objects */
+    void clear()
+    {
+        conv1.reset();
+        act1.reset();
+        norm1.reset();
+        pool1.reset();
+        conv21.reset();
+        conv22.reset();
+        act2.reset();
+        norm2.reset();
+        pool2.reset();
+        conv3.reset();
+        act3.reset();
+        conv41.reset();
+        conv42.reset();
+        act4.reset();
+        conv51.reset();
+        conv52.reset();
+        act5.reset();
+        pool5.reset();
+        fc6.reset();
+        act6.reset();
+        fc7.reset();
+        act7.reset();
+        fc8.reset();
+        smx.reset();
+
+        // Free allocations
+        input.allocator()->free();
+        output.allocator()->free();
+        for(auto &wi : w)
+        {
+            wi.reset();
+        }
+        for(auto &bi : b)
+        {
+            bi.reset();
+        }
+
+        w21.reset();
+        w22.reset();
+        b21.reset();
+        b21.reset();
+        w41.reset();
+        w42.reset();
+        b41.reset();
+        b42.reset();
+        w51.reset();
+        w52.reset();
+        b51.reset();
+        b52.reset();
+
+        conv1_out.allocator()->free();
+        act1_out.allocator()->free();
+        norm1_out.allocator()->free();
+        pool1_out.allocator()->free();
+        conv2_out.allocator()->free();
+        act2_out.allocator()->free();
+        norm2_out.allocator()->free();
+        pool2_out.allocator()->free();
+        conv3_out.allocator()->free();
+        act3_out.allocator()->free();
+        conv4_out.allocator()->free();
+        act4_out.allocator()->free();
+        conv5_out.allocator()->free();
+        act5_out.allocator()->free();
+        pool5_out.allocator()->free();
+        fc6_out.allocator()->free();
+        act6_out.allocator()->free();
+        fc7_out.allocator()->free();
+        act7_out.allocator()->free();
+        fc8_out.allocator()->free();
+    }
+
+    /** Runs the model */
+    void run()
+    {
+        // Layer 1
+        conv1->run();
+        act1->run();
+        norm1->run();
+        pool1->run();
+        // Layer 2
+        conv21->run();
+        conv22->run();
+        act2->run();
+        norm2->run();
+        pool2->run();
+        // Layer 3
+        conv3->run();
+        act3->run();
+        // Layer 4
+        conv41->run();
+        conv42->run();
+        act4->run();
+        // Layer 5
+        conv51->run();
+        conv52->run();
+        act5->run();
+        pool5->run();
+        // Layer 6
+        fc6->run();
+        act6->run();
+        // Layer 7
+        fc7->run();
+        act7->run();
+        // Layer 8
+        fc8->run();
+        // Softmax
+        smx->run();
+    }
+
+private:
+    unsigned int _batches;
+    bool         _reshaped_weights;
+
+    std::unique_ptr<ActivationLayerFunction>     act1{ nullptr }, act2{ nullptr }, act3{ nullptr }, act4{ nullptr }, act5{ nullptr }, act6{ nullptr }, act7{ nullptr };
+    std::unique_ptr<ConvolutionLayerFunction>    conv1{ nullptr }, conv21{ nullptr }, conv22{ nullptr }, conv3{ nullptr }, conv41{ nullptr }, conv42{ nullptr }, conv51{ nullptr }, conv52{ nullptr };
+    std::unique_ptr<FullyConnectedLayerFunction> fc6{ nullptr }, fc7{ nullptr }, fc8{};
+    std::unique_ptr<NormalizationLayerFunction>  norm1{ nullptr }, norm2{ nullptr };
+    std::unique_ptr<PoolingLayerFunction>        pool1{ nullptr }, pool2{ nullptr }, pool5{ nullptr };
+    std::unique_ptr<SoftmaxLayerFunction>        smx{ nullptr };
+
+    TensorType input{}, output{};
+    std::array<std::unique_ptr<TensorType>, 8> w{}, b{};
+    std::unique_ptr<ITensorType> w21{ nullptr }, w22{ nullptr }, b21{ nullptr }, b22{ nullptr };
+    std::unique_ptr<ITensorType> w41{ nullptr }, w42{ nullptr }, b41{ nullptr }, b42{ nullptr };
+    std::unique_ptr<ITensorType> w51{ nullptr }, w52{ nullptr }, b51{ nullptr }, b52{ nullptr };
+
+    TensorType conv1_out{}, act1_out{}, norm1_out{}, pool1_out{};
+    TensorType conv2_out{}, act2_out{}, pool2_out{}, norm2_out{};
+    TensorType conv3_out{}, act3_out{};
+    TensorType conv4_out{}, act4_out{};
+    TensorType conv5_out{}, act5_out{}, pool5_out{};
+    TensorType fc6_out{}, act6_out{};
+    TensorType fc7_out{}, act7_out{};
+    TensorType fc8_out{};
+
+    std::unique_ptr<SubTensorType> pool11_out{ nullptr }, pool12_out{ nullptr };
+    std::unique_ptr<SubTensorType> conv21_out{ nullptr }, conv22_out{ nullptr };
+    std::unique_ptr<SubTensorType> act31_out{ nullptr }, act32_out{ nullptr };
+    std::unique_ptr<SubTensorType> conv41_out{ nullptr }, conv42_out{ nullptr }, act41_out{ nullptr }, act42_out{ nullptr };
+    std::unique_ptr<SubTensorType> conv51_out{ nullptr }, conv52_out{ nullptr };
+};
+} // namespace model_objects
+} // namespace test
+} // namespace arm_compute
+#endif //__ARM_COMPUTE_TEST_MODEL_OBJECTS_ALEXNET_H__
diff --git a/tests/model_objects/LeNet5.h b/tests/model_objects/LeNet5.h
new file mode 100644
index 0000000000..7d5090f5d0
--- /dev/null
+++ b/tests/model_objects/LeNet5.h
@@ -0,0 +1,277 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_MODEL_OBJECTS_LENET5_H__
+#define __ARM_COMPUTE_TEST_MODEL_OBJECTS_LENET5_H__
+
+#include "TensorLibrary.h"
+#include "Utils.h"
+
+#include <memory>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace model_objects
+{
+/** Lenet5 model object */
+template <typename TensorType,
+          typename Accessor,
+          typename ActivationLayerFunction,
+          typename ConvolutionLayerFunction,
+          typename FullyConnectedLayerFunction,
+          typename PoolingLayerFunction,
+          typename SoftmaxLayerFunction>
+class LeNet5
+{
+public:
+    /** Initialize and build the model.
+     *
+     * @param batches Number of batches should handle
+     */
+    void build(unsigned int batches)
+    {
+        // Initialize input, output, weights and biases
+        input.allocator()->init(TensorInfo(TensorShape(28U, 28U, 1U, batches), 1, DataType::F32));
+        output.allocator()->init(TensorInfo(TensorShape(10U, batches), 1, DataType::F32));
+        w[0].allocator()->init(TensorInfo(TensorShape(5U, 5U, 1U, 20U), 1, DataType::F32));
+        b[0].allocator()->init(TensorInfo(TensorShape(20U), 1, DataType::F32));
+        w[1].allocator()->init(TensorInfo(TensorShape(5U, 5U, 20U, 50U), 1, DataType::F32));
+        b[1].allocator()->init(TensorInfo(TensorShape(50U), 1, DataType::F32));
+        w[2].allocator()->init(TensorInfo(TensorShape(800U, 500U), 1, DataType::F32));
+        b[2].allocator()->init(TensorInfo(TensorShape(500U), 1, DataType::F32));
+        w[3].allocator()->init(TensorInfo(TensorShape(500U, 10U), 1, DataType::F32));
+        b[3].allocator()->init(TensorInfo(TensorShape(10U), 1, DataType::F32));
+
+        // Initialize intermediate tensors
+        // Layer 1
+        conv1_out.allocator()->init(TensorInfo(TensorShape(24U, 24U, 20U, batches), 1, DataType::F32));
+        pool1_out.allocator()->init(TensorInfo(TensorShape(12U, 12U, 20U, batches), 1, DataType::F32));
+        // Layer 2
+        conv2_out.allocator()->init(TensorInfo(TensorShape(8U, 8U, 50U, batches), 1, DataType::F32));
+        pool2_out.allocator()->init(TensorInfo(TensorShape(4U, 4U, 50U, batches), 1, DataType::F32));
+        // Layer 3
+        fc1_out.allocator()->init(TensorInfo(TensorShape(500U, batches), 1, DataType::F32));
+        act1_out.allocator()->init(TensorInfo(TensorShape(500U, batches), 1, DataType::F32));
+        // Layer 6
+        fc2_out.allocator()->init(TensorInfo(TensorShape(10U, batches), 1, DataType::F32));
+
+        // Allocate layers
+        {
+            // Layer 1
+            conv1 = std::unique_ptr<ConvolutionLayerFunction>(new ConvolutionLayerFunction());
+            pool1 = std::unique_ptr<PoolingLayerFunction>(new PoolingLayerFunction());
+            // Layer 2
+            conv2 = std::unique_ptr<ConvolutionLayerFunction>(new ConvolutionLayerFunction());
+            pool2 = std::unique_ptr<PoolingLayerFunction>(new PoolingLayerFunction());
+            // Layer 3
+            fc1  = std::unique_ptr<FullyConnectedLayerFunction>(new FullyConnectedLayerFunction());
+            act1 = std::unique_ptr<ActivationLayerFunction>(new ActivationLayerFunction());
+            // Layer 4
+            fc2 = std::unique_ptr<FullyConnectedLayerFunction>(new FullyConnectedLayerFunction());
+            // Softmax
+            smx = std::unique_ptr<SoftmaxLayerFunction>(new SoftmaxLayerFunction());
+        }
+
+        // Configure Layers
+        {
+            conv1->configure(&input, &w[0], &b[0], &conv1_out, PadStrideInfo(1, 1, 0, 0));
+            pool1->configure(&conv1_out, &pool1_out, PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0)));
+            conv2->configure(&pool1_out, &w[1], &b[1], &conv2_out, PadStrideInfo(1, 1, 0, 0));
+            pool2->configure(&conv2_out, &pool2_out, PoolingLayerInfo(PoolingType::MAX, 2, PadStrideInfo(2, 2, 0, 0)));
+            fc1->configure(&pool2_out, &w[2], &b[2], &fc1_out);
+            act1->configure(&fc1_out, &act1_out, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
+            fc2->configure(&act1_out, &w[3], &b[3], &fc2_out);
+            smx->configure(&fc2_out, &output);
+        }
+
+        // Allocate tensors
+        {
+            input.allocator()->allocate();
+            output.allocator()->allocate();
+            for(auto &wi : w)
+            {
+                wi.allocator()->allocate();
+            }
+            for(auto &bi : b)
+            {
+                bi.allocator()->allocate();
+            }
+            conv1_out.allocator()->allocate();
+            pool1_out.allocator()->allocate();
+            conv2_out.allocator()->allocate();
+            pool2_out.allocator()->allocate();
+            fc1_out.allocator()->allocate();
+            act1_out.allocator()->allocate();
+            fc2_out.allocator()->allocate();
+        }
+    }
+
+    /** Fills the trainable parameters and input with random data. */
+    void fill_random()
+    {
+        std::uniform_real_distribution<> distribution(-1, 1);
+        library->fill(Accessor(input), distribution, 0);
+        for(unsigned int i = 0; i < w.size(); ++i)
+        {
+            library->fill(Accessor(w[i]), distribution, i + 1);
+            library->fill(Accessor(b[i]), distribution, i + 10);
+        }
+    }
+
+#ifdef INTERNAL_ONLY
+    /** Fills the trainable parameters from binary files
+     *
+     * @param weights Files names containing the weights data
+     * @param biases  Files names containing the bias data
+     */
+    void fill(std::vector<std::string> weights, std::vector<std::string> biases)
+    {
+        ARM_COMPUTE_ERROR_ON(weights.size() != w.size());
+        ARM_COMPUTE_ERROR_ON(biases.size() != b.size());
+
+        for(unsigned int i = 0; i < weights.size(); ++i)
+        {
+            library->fill_layer_data(Accessor(w[i]), weights[i]);
+            library->fill_layer_data(Accessor(b[i]), biases[i]);
+        }
+    }
+
+    /** Feed input to network from file.
+     *
+     * @param name File name of containing the input data.
+     */
+    void feed(std::string name)
+    {
+        library->fill_layer_data(Accessor(input), name);
+    }
+#endif /* INTERNAL_ONLY */
+
+    /** Get the classification results.
+     *
+     * @return Vector containing the classified labels
+     */
+    std::vector<unsigned int> get_classifications()
+    {
+        std::vector<unsigned int> classified_labels;
+        Accessor                  output_accessor(output);
+
+        Window window;
+        window.set(Window::DimX, Window::Dimension(0, 1, 1));
+        for(unsigned int d = 1; d < output_accessor.shape().num_dimensions(); ++d)
+        {
+            window.set(d, Window::Dimension(0, output_accessor.shape()[d], 1));
+        }
+
+        execute_window_loop(window, [&](const Coordinates & id)
+        {
+            int               max_idx = 0;
+            float             val     = 0;
+            const void *const out_ptr = output_accessor(id);
+            for(unsigned int l = 0; l < output_accessor.shape().x(); ++l)
+            {
+                float curr_val = reinterpret_cast<const float *>(out_ptr)[l];
+                if(curr_val > val)
+                {
+                    max_idx = l;
+                    val     = curr_val;
+                }
+            }
+            classified_labels.push_back(max_idx);
+        });
+        return classified_labels;
+    }
+
+    /** Clear all allocated memory from the tensor objects */
+    void clear()
+    {
+        conv1.reset();
+        pool1.reset();
+        conv2.reset();
+        pool2.reset();
+        fc1.reset();
+        act1.reset();
+        fc2.reset();
+        smx.reset();
+
+        input.allocator()->free();
+        output.allocator()->free();
+        for(auto &wi : w)
+        {
+            wi.allocator()->free();
+        }
+        for(auto &bi : b)
+        {
+            bi.allocator()->free();
+        }
+
+        conv1_out.allocator()->free();
+        pool1_out.allocator()->free();
+        conv2_out.allocator()->free();
+        pool2_out.allocator()->free();
+        fc1_out.allocator()->free();
+        act1_out.allocator()->free();
+        fc2_out.allocator()->free();
+    }
+
+    /** Runs the model */
+    void run()
+    {
+        // Layer 1
+        conv1->run();
+        pool1->run();
+        // Layer 2
+        conv2->run();
+        pool2->run();
+        // Layer 3
+        fc1->run();
+        act1->run();
+        // Layer 4
+        fc2->run();
+        // Softmax
+        smx->run();
+    }
+
+private:
+    std::unique_ptr<ActivationLayerFunction>     act1{ nullptr };
+    std::unique_ptr<ConvolutionLayerFunction>    conv1{ nullptr }, conv2{ nullptr };
+    std::unique_ptr<FullyConnectedLayerFunction> fc1{ nullptr }, fc2{ nullptr };
+    std::unique_ptr<PoolingLayerFunction>        pool1{ nullptr }, pool2{ nullptr };
+    std::unique_ptr<SoftmaxLayerFunction>        smx{ nullptr };
+
+    TensorType input{}, output{};
+    std::array<TensorType, 4> w{}, b{};
+
+    TensorType conv1_out{}, pool1_out{};
+    TensorType conv2_out{}, pool2_out{};
+    TensorType fc1_out{}, act1_out{};
+    TensorType fc2_out{};
+};
+} // namespace model_objects
+} // namespace test
+} // namespace arm_compute
+#endif //__ARM_COMPUTE_TEST_MODEL_OBJECTS_LENET5_H__
diff --git a/tests/validation/CL/BitwiseAnd.cpp b/tests/validation/CL/BitwiseAnd.cpp
new file mode 100644
index 0000000000..4cd64a2a99
--- /dev/null
+++ b/tests/validation/CL/BitwiseAnd.cpp
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CL/CLAccessor.h"
+#include "CL/Helper.h"
+#include "Globals.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLSubTensor.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLBitwiseAnd.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::cl;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute Neon bitwise and function.
+ *
+ * @param[in] shape Shape of the input and output tensors.
+ *
+ * @return Computed output tensor.
+ */
+CLTensor compute_bitwise_and(const TensorShape &shape)
+{
+    // Create tensors
+    CLTensor src1 = create_tensor(shape, DataType::U8);
+    CLTensor src2 = create_tensor(shape, DataType::U8);
+    CLTensor dst  = create_tensor(shape, DataType::U8);
+
+    // Create and configure function
+    CLBitwiseAnd band;
+    band.configure(&src1, &src2, &dst);
+
+    // Allocate tensors
+    src1.allocator()->allocate();
+    src2.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src1.info()->is_resizable());
+    BOOST_TEST(!src2.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    library->fill_tensor_uniform(CLAccessor(src1), 0);
+    library->fill_tensor_uniform(CLAccessor(src2), 1);
+
+    // Compute function
+    band.run();
+
+    return dst;
+}
+
+/** Compute OpenCL bitwise and function that splits the input and output in two subtensor.
+ *
+ * @param[in] shape Shape of the input and output tensors.
+ *
+ * @return Computed output tensor.
+ */
+CLTensor compute_bitwise_and_subtensor(const TensorShape &shape)
+{
+    // Create tensors
+    CLTensor src1 = create_tensor(shape, DataType::U8);
+    CLTensor src2 = create_tensor(shape, DataType::U8);
+    CLTensor dst  = create_tensor(shape, DataType::U8);
+
+    // Create SubTensors
+    int         coord_z   = shape.z() / 2;
+    TensorShape sub_shape = shape;
+    sub_shape.set(2, coord_z);
+
+    CLSubTensor src1_sub1(&src1, sub_shape, Coordinates());
+    CLSubTensor src1_sub2(&src1, sub_shape, Coordinates(0, 0, coord_z));
+    CLSubTensor src2_sub1(&src2, sub_shape, Coordinates());
+    CLSubTensor src2_sub2(&src2, sub_shape, Coordinates(0, 0, coord_z));
+    CLSubTensor dst_sub1(&dst, sub_shape, Coordinates());
+    CLSubTensor dst_sub2(&dst, sub_shape, Coordinates(0, 0, coord_z));
+
+    // Create and configure function
+    CLBitwiseAnd band1, band2;
+    band1.configure(&src1_sub1, &src2_sub1, &dst_sub1);
+    band2.configure(&src1_sub2, &src2_sub2, &dst_sub2);
+
+    // Allocate tensors
+    src1.allocator()->allocate();
+    src2.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src1.info()->is_resizable());
+    BOOST_TEST(!src2.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    std::uniform_int_distribution<> distribution(0, 255);
+    library->fill(CLAccessor(src1), distribution, 0);
+    library->fill(CLAccessor(src2), distribution, 1);
+
+    // Compute function
+    band1.run();
+    band2.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(CL)
+BOOST_AUTO_TEST_SUITE(BitwiseAnd)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, SmallShapes() + LargeShapes(), shape)
+{
+    // Create tensors
+    CLTensor src1 = create_tensor(shape, DataType::U8);
+    CLTensor src2 = create_tensor(shape, DataType::U8);
+    CLTensor dst  = create_tensor(shape, DataType::U8);
+
+    BOOST_TEST(src1.info()->is_resizable());
+    BOOST_TEST(src2.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create and configure function
+    CLBitwiseAnd band;
+    band.configure(&src1, &src2, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src1.info()->valid_region(), valid_region);
+    validate(src2.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding(0, required_padding(shape.x(), 16), 0, 0);
+    validate(src1.info()->padding(), padding);
+    validate(src2.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes(), shape)
+{
+    // Compute function
+    CLTensor dst = compute_bitwise_and(shape);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_bitwise_and(shape);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_AUTO_TEST_CASE(RunSubTensor)
+{
+    // Create shape
+    TensorShape shape(27U, 35U, 8U, 2U);
+
+    // Compute function
+    CLTensor dst = compute_bitwise_and_subtensor(shape);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_bitwise_and(shape);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes(), shape)
+{
+    // Compute function
+    CLTensor dst = compute_bitwise_and(shape);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_bitwise_and(shape);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/CL/CLFixture.cpp b/tests/validation/CL/CLFixture.cpp
new file mode 100644
index 0000000000..845e16629d
--- /dev/null
+++ b/tests/validation/CL/CLFixture.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "validation/CL/CLFixture.h"
+
+#include "boost_wrapper.h"
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::validation;
+using namespace arm_compute::test::validation::cl;
+
+BOOST_GLOBAL_FIXTURE(CLFixture);
diff --git a/tests/validation/CL/CLFixture.h b/tests/validation/CL/CLFixture.h
new file mode 100644
index 0000000000..138e0566eb
--- /dev/null
+++ b/tests/validation/CL/CLFixture.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_VALIDATION_CL_CLFIXTURE_H__
+#define __ARM_COMPUTE_TEST_VALIDATION_CL_CLFIXTURE_H__
+
+#include "arm_compute/runtime/CL/CLScheduler.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace cl
+{
+struct CLFixture
+{
+    CLFixture()
+    {
+        CLScheduler::get().default_init();
+    }
+};
+} // namespace cl
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/validation/CL/CMakeLists.txt b/tests/validation/CL/CMakeLists.txt
new file mode 100644
index 0000000000..209b662033
--- /dev/null
+++ b/tests/validation/CL/CMakeLists.txt
@@ -0,0 +1,48 @@
+# Copyright (c) 2017 ARM Limited.
+#
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+cmake_minimum_required (VERSION 3.1)
+
+include_directories(${CMAKE_SOURCE_DIR}/../include)
+
+set(arm_compute_test_validation_OPENCL_SOURCE_FILES
+    ${CMAKE_SOURCE_DIR}/CL/CLAccessor.h
+    ${CMAKE_SOURCE_DIR}/CL/Helper.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/CLFixture.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/CLFixture.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/BitwiseAnd.cpp
+)
+
+add_library(arm_compute_test_validation_OPENCL OBJECT
+    ${arm_compute_test_validation_OPENCL_SOURCE_FILES}
+)
+
+set(arm_compute_test_validation_TARGET_OBJECTS
+    ${arm_compute_test_validation_TARGET_OBJECTS}
+    $<TARGET_OBJECTS:arm_compute_test_validation_OPENCL>
+    PARENT_SCOPE
+)
+
+set(arm_compute_test_validation_TARGET_LIBRARIES
+    ${arm_compute_test_validation_TARGET_LIBRARIES}
+    OpenCL
+    PARENT_SCOPE
+)
diff --git a/tests/validation/CL/DepthConvert.cpp b/tests/validation/CL/DepthConvert.cpp
new file mode 100644
index 0000000000..7a421ecf5a
--- /dev/null
+++ b/tests/validation/CL/DepthConvert.cpp
@@ -0,0 +1,413 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CL/CLAccessor.h"
+#include "CL/Helper.h"
+#include "Globals.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/functions/CLDepthConvert.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::cl;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute CL depth convert function.
+ *
+ * @param[in] shape  Shape of the input and output tensors.
+ * @param[in] dt_in  Data type of input tensor.
+ * @param[in] dt_out Data type of the output tensor.
+ * @param[in] policy Conversion policy.
+ * @param[in] shift  Value for down/up conversions. Must be 0 <= shift < 8.
+ *
+ * @return Computed output CLtensor.
+ */
+CLTensor compute_depth_convert(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift)
+{
+    // Create tensors
+    CLTensor src = create_tensor(shape, dt_in);
+    CLTensor dst = create_tensor(shape, dt_out);
+
+    // Create and configure function
+    CLDepthConvert depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    library->fill_tensor_uniform(CLAccessor(src), 0);
+
+    // Compute function
+    depth_convert.run();
+
+    return dst;
+}
+/** Configure and validate region/padding function.
+ *
+ * @param[in] shape  Shape of the input and output tensors.
+ * @param[in] dt_in  Data type of input tensor.
+ * @param[in] dt_out Data type of the output tensor.
+ * @param[in] policy Conversion policy.
+ * @param[in] shift  Value for down/up conversions. Must be 0 <= shift < 8.
+ *
+ */
+void compute_configure_validate(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift)
+{
+    // Create tensors
+    CLTensor src = create_tensor(shape, dt_in);
+    CLTensor dst = create_tensor(shape, dt_out);
+
+    BOOST_TEST(src.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create and configure function
+    CLDepthConvert depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding(0, required_padding(shape.x(), 16), 0, 0);
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(CL)
+BOOST_AUTO_TEST_SUITE(DepthConvert)
+
+BOOST_AUTO_TEST_SUITE(U8_to_U16)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute configure and validate region/padding
+    compute_configure_validate(shape, DataType::U8, DataType::U16, policy, shift);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    CLTensor dst = compute_depth_convert(shape, DataType::U8, DataType::U16, policy, shift);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::U8, DataType::U16, policy, shift, 0);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst);
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    CLTensor dst = compute_depth_convert(shape, DataType::U8, DataType::U16, policy, shift);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::U8, DataType::U16, policy, shift, 0);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(U8_to_S16)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute configure and validate region/padding
+    compute_configure_validate(shape, DataType::U8, DataType::S16, policy, shift);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    CLTensor dst = compute_depth_convert(shape, DataType::U8, DataType::S16, policy, shift);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::U8, DataType::S16, policy, shift, 0);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    CLTensor dst = compute_depth_convert(shape, DataType::U8, DataType::S16, policy, shift);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::U8, DataType::S16, policy, shift, 0);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(U8_to_S32)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute configure and validate region/padding
+    compute_configure_validate(shape, DataType::U8, DataType::S32, policy, shift);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    CLTensor dst = compute_depth_convert(shape, DataType::U8, DataType::S32, policy, shift);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::U8, DataType::S32, policy, shift, 0);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    CLTensor dst = compute_depth_convert(shape, DataType::U8, DataType::S32, policy, shift);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::U8, DataType::S32, policy, shift, 0);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(U16_to_U8)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute configure and validate region/padding
+    compute_configure_validate(shape, DataType::U16, DataType::U8, policy, shift);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    CLTensor dst = compute_depth_convert(shape, DataType::U16, DataType::U8, policy, shift);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::U16, DataType::U8, policy, shift, 0);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    CLTensor dst = compute_depth_convert(shape, DataType::U16, DataType::U8, policy, shift);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::U16, DataType::U8, policy, shift, 0);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(U16_to_U32)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute configure and validate region/padding
+    compute_configure_validate(shape, DataType::U16, DataType::U32, policy, shift);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    CLTensor dst = compute_depth_convert(shape, DataType::U16, DataType::U32, policy, shift);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::U16, DataType::U32, policy, shift, 0);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    CLTensor dst = compute_depth_convert(shape, DataType::U16, DataType::U32, policy, shift);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::U16, DataType::U32, policy, shift, 0);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(S16_to_U8)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute configure and validate region/padding
+    compute_configure_validate(shape, DataType::S16, DataType::U8, policy, shift);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    CLTensor dst = compute_depth_convert(shape, DataType::S16, DataType::U8, policy, shift);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::S16, DataType::U8, policy, shift, 0);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    CLTensor dst = compute_depth_convert(shape, DataType::S16, DataType::U8, policy, shift);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::S16, DataType::U8, policy, shift, 0);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(S16_to_S32)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute configure and validate region/padding
+    compute_configure_validate(shape, DataType::S16, DataType::S32, policy, shift);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    CLTensor dst = compute_depth_convert(shape, DataType::S16, DataType::S32, policy, shift);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::S16, DataType::S32, policy, shift, 0);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    CLTensor dst = compute_depth_convert(shape, DataType::S16, DataType::S32, policy, shift);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::S16, DataType::S32, policy, shift, 0);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/CL/FillBorder.cpp b/tests/validation/CL/FillBorder.cpp
new file mode 100644
index 0000000000..42b9064982
--- /dev/null
+++ b/tests/validation/CL/FillBorder.cpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CL/CLAccessor.h"
+#include "CL/Helper.h"
+#include "Globals.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::cl;
+using namespace arm_compute::test::validation;
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(CL)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(FillBorder, BorderModes() * boost::unit_test::data::make({ PaddingSize{ 0 }, PaddingSize{ 1, 0, 1, 2 }, PaddingSize{ 10 } }), border_mode, padding)
+{
+    constexpr uint8_t border_value = 42U;
+    constexpr uint8_t tensor_value = 89U;
+    BorderSize        border_size{ 5 };
+
+    // Create tensors
+    CLTensor src = create_tensor(TensorShape{ 10U, 10U, 2U }, DataType::U8);
+
+    src.info()->extend_padding(padding);
+
+    // Allocate tensor
+    src.allocator()->allocate();
+
+    // Check padding is as required
+    validate(src.info()->padding(), padding);
+
+    // Fill tensor with constant value
+    std::uniform_int_distribution<uint8_t> distribution{ tensor_value, tensor_value };
+    library->fill(CLAccessor(src), distribution, 0);
+
+    // Create and configure kernel
+    CLFillBorderKernel fill_border;
+    fill_border.configure(&src, border_size, border_mode, border_value);
+
+    // Run kernel
+    fill_border.run(fill_border.window(), CLScheduler::get().queue());
+
+    // Validate border
+    border_size.limit(padding);
+    validate(CLAccessor(src), border_size, border_mode, &border_value);
+
+    // Validate tensor
+    validate(CLAccessor(src), &tensor_value);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/CL/Threshold.cpp b/tests/validation/CL/Threshold.cpp
new file mode 100644
index 0000000000..a8c77ec10a
--- /dev/null
+++ b/tests/validation/CL/Threshold.cpp
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CL/CLAccessor.h"
+#include "CL/Helper.h"
+#include "Globals.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "dataset/ThresholdDataset.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLThreshold.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::cl;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute Threshold function.
+ *
+ * @param[in] shape       Shape of the input and output tensors.
+ * @param[in] threshold   Threshold. When the threshold type is RANGE, this is used as the lower threshold.
+ * @param[in] false_value value to set when the condition is not respected.
+ * @param[in] true_value  value to set when the condition is respected.
+ * @param[in] type        Thresholding type. Either RANGE or BINARY.
+ * @param[in] upper       Upper threshold. Only used when the thresholding type is RANGE.
+ *
+ * @return Computed output tensor.
+ */
+CLTensor compute_threshold(const TensorShape &shape, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper)
+{
+    // Create tensors
+    CLTensor src = create_tensor(shape, DataType::U8);
+    CLTensor dst = create_tensor(shape, DataType::U8);
+
+    // Create and configure function
+    CLThreshold thrsh;
+    thrsh.configure(&src, &dst, threshold, false_value, true_value, type, upper);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    library->fill_tensor_uniform(CLAccessor(src), 0);
+
+    // Compute function
+    thrsh.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(CL)
+BOOST_AUTO_TEST_SUITE(Threshold)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration,
+                     (SmallShapes() + LargeShapes()) * ThresholdDataset(),
+                     shape, threshold_conf)
+{
+    // Create tensors
+    CLTensor src = create_tensor(shape, DataType::U8);
+    CLTensor dst = create_tensor(shape, DataType::U8);
+
+    BOOST_TEST(src.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create and configure function
+    CLThreshold cl_threshold;
+    cl_threshold.configure(&src, &dst, threshold_conf.threshold, threshold_conf.false_value, threshold_conf.true_value, threshold_conf.type, threshold_conf.upper);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding(0, required_padding(shape.x(), 16), 0, 0);
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall,
+                     SmallShapes() * ThresholdDataset(),
+                     shape, threshold_conf)
+{
+    // Compute function
+    CLTensor dst = compute_threshold(shape, threshold_conf.threshold, threshold_conf.false_value, threshold_conf.true_value, threshold_conf.type, threshold_conf.upper);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_threshold(shape, threshold_conf.threshold, threshold_conf.false_value, threshold_conf.true_value, threshold_conf.type, threshold_conf.upper);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge,
+                     LargeShapes() * ThresholdDataset(),
+                     shape, threshold_conf)
+{
+    // Compute function
+    CLTensor dst = compute_threshold(shape, threshold_conf.threshold, threshold_conf.false_value, threshold_conf.true_value, threshold_conf.type, threshold_conf.upper);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_threshold(shape, threshold_conf.threshold, threshold_conf.false_value, threshold_conf.true_value, threshold_conf.type, threshold_conf.upper);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/CMakeLists.txt b/tests/validation/CMakeLists.txt
new file mode 100644
index 0000000000..3d8f56610b
--- /dev/null
+++ b/tests/validation/CMakeLists.txt
@@ -0,0 +1,96 @@
+# Copyright (c) 2017 ARM Limited.
+#
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+cmake_minimum_required (VERSION 3.1)
+
+add_library(openvx SHARED IMPORTED)
+set_target_properties(openvx PROPERTIES
+    IMPORTED_LOCATION "${CMAKE_SOURCE_DIR}/../3rdparty/linux/armv7a/libopenvx.so"
+)
+
+add_library(vxu SHARED IMPORTED)
+set_target_properties(vxu PROPERTIES
+    IMPORTED_LOCATION "${CMAKE_SOURCE_DIR}/../3rdparty/linux/armv7a/libvxu.so"
+)
+
+add_library(OpenCL SHARED IMPORTED)
+set_target_properties(OpenCL PROPERTIES
+    IMPORTED_LOCATION "${CMAKE_SOURCE_DIR}/../build/opencl-1.2-stubs/libOpenCL.so"
+    IMPORTED_NO_SONAME 1
+)
+
+add_definitions(-DBOOST)
+
+set(ARM_COMPUTE_TARGETS_TO_VALIDATE "all" CACHE STRING "Semicolon-separated list of targets to include in validation.")
+
+set(ARM_COMPUTE_ALL_TARGETS
+    NEON
+    CL
+    UNIT
+    VX
+)
+
+if(ARM_COMPUTE_TARGETS_TO_VALIDATE STREQUAL "all")
+    set(ARM_COMPUTE_TARGETS_TO_VALIDATE ${ARM_COMPUTE_ALL_TARGETS})
+endif()
+
+list(REMOVE_DUPLICATES ARM_COMPUTE_TARGETS_TO_VALIDATE)
+
+foreach(TARGET ${ARM_COMPUTE_TARGETS_TO_VALIDATE})
+    list(FIND ARM_COMPUTE_ALL_TARGETS ${TARGET} idx)
+
+    if(${idx} LESS 0)
+        message(FATAL_ERROR "The target '${TARGET}' does not exist. It should be one of\n${ARM_COMPUTE_ALL_TARGETS}")
+    else()
+        add_subdirectory(${TARGET})
+    endif()
+endforeach()
+
+set(arm_compute_test_validation_SOURCE_FILES
+    ${CMAKE_CURRENT_SOURCE_DIR}/main.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/Datasets.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/Reference.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/Reference.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/ReferenceCPP.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/ReferenceCPP.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/Validation.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/Validation.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/ValidationProgramOptions.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/ValidationUserConfiguration.h
+)
+
+add_library(arm_compute_test_validation OBJECT
+    ${arm_compute_test_validation_SOURCE_FILES}
+)
+
+add_executable(arm_compute_validation
+    $<TARGET_OBJECTS:arm_compute_test_validation>
+    ${arm_compute_test_validation_TARGET_OBJECTS}
+    $<TARGET_OBJECTS:tensor_library>
+    $<TARGET_OBJECTS:arm_compute_test>
+)
+
+target_link_libraries(arm_compute_validation
+    boost_unit_test_framework
+    boost_program_options
+    arm_compute
+    ${arm_compute_test_validation_TARGET_LIBRARIES}
+)
diff --git a/tests/validation/Datasets.h b/tests/validation/Datasets.h
new file mode 100644
index 0000000000..ae76fb6be3
--- /dev/null
+++ b/tests/validation/Datasets.h
@@ -0,0 +1,238 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_VALIDATION_DATASETS_H__
+#define __ARM_COMPUTE_TEST_VALIDATION_DATASETS_H__
+
+#include "dataset/ActivationFunctionDataset.h"
+#include "dataset/BatchNormalizationLayerDataset.h"
+#include "dataset/BorderModeDataset.h"
+#include "dataset/ConvertPolicyDataset.h"
+#include "dataset/ConvolutionLayerDataset.h"
+#include "dataset/DataTypeDatasets.h"
+#include "dataset/FullyConnectedLayerDataset.h"
+#include "dataset/GEMMDataset.h"
+#include "dataset/ImageDatasets.h"
+#include "dataset/InterpolationPolicyDataset.h"
+#include "dataset/NormalizationTypeDataset.h"
+#include "dataset/PoolingLayerDataset.h"
+#include "dataset/RoundingPolicyDataset.h"
+#include "dataset/ShapeDatasets.h"
+#include "dataset/ThresholdDataset.h"
+
+#include "boost_wrapper.h"
+
+using namespace boost::unit_test::data::monomorphic;
+
+namespace boost
+{
+namespace unit_test
+{
+namespace data
+{
+namespace monomorphic
+{
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::SmallImages> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::LargeImages> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::SmallShapes> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::Small1DShape> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::LargeShapes> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::AllDataTypes> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::UnsignedDataTypes> : boost::mpl::true_
+{
+};
+
+// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::SignedDataTypes> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::FloatDataTypes> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::FixedPointDataTypes> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::CNNFloatDataTypes> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::CNNFixedPointDataTypes> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::CNNDataTypes> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::ActivationFunctions> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::BorderModes> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::ConvertPolicies> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::InterpolationPolicies> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::NormalizationTypes> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::RandomPoolingLayerDataset> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::RoundingPolicies> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::AlexNetConvolutionLayerDataset> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::AlexNetFullyConnectedLayerDataset> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::DirectConvolutionShapes> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::SmallFullyConnectedLayerDataset> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::LargeFullyConnectedLayerDataset> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::SmallConvolutionLayerDataset> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::SmallGEMMDataset> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::LargeGEMMDataset> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::RandomBatchNormalizationLayerDataset> : boost::mpl::true_
+{
+};
+
+/// Register the data set with Boost
+template <>
+struct is_dataset<arm_compute::test::ThresholdDataset> : boost::mpl::true_
+{
+};
+}
+}
+}
+}
+#endif
diff --git a/tests/validation/FixedPoint.h b/tests/validation/FixedPoint.h
new file mode 100644
index 0000000000..380bad04a1
--- /dev/null
+++ b/tests/validation/FixedPoint.h
@@ -0,0 +1,975 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_VALIDATION_FIXEDPOINT_H__
+#define __ARM_COMPUTE_TEST_VALIDATION_FIXEDPOINT_H__
+
+#include "Utils.h"
+
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <limits>
+#include <string>
+#include <type_traits>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace fixed_point_arithmetic
+{
+namespace detail
+{
+// Forward declare structs
+struct functions;
+template <typename T>
+struct constant_expr;
+}
+
+/** Fixed point traits */
+namespace traits
+{
+// Promote types
+// *INDENT-OFF*
+// clang-format off
+template <typename T> struct promote { };
+template <> struct promote<uint8_t> { using type = uint16_t; };
+template <> struct promote<int8_t> { using type = int16_t; };
+template <> struct promote<uint16_t> { using type = uint32_t; };
+template <> struct promote<int16_t> { using type = int32_t; };
+template <> struct promote<uint32_t> { using type = uint64_t; };
+template <> struct promote<int32_t> { using type = int64_t; };
+template <> struct promote<uint64_t> { using type = uint64_t; };
+template <> struct promote<int64_t> { using type = int64_t; };
+// clang-format on
+// *INDENT-ON*
+}
+
+/** Strongly typed enum class representing the overflow policy */
+enum class OverflowPolicy
+{
+    WRAP,    /**< Wrap policy */
+    SATURATE /**< Saturate policy */
+};
+/** Strongly typed enum class representing the rounding policy */
+enum class RoundingPolicy
+{
+    TO_ZERO,        /**< Round to zero policy */
+    TO_NEAREST_EVEN /**< Round to nearest even policy */
+};
+
+/** Arbitrary fixed-point arithmetic class */
+template <typename T>
+class fixed_point
+{
+public:
+    // Static Checks
+    static_assert(std::is_integral<T>::value, "Type is not an integer");
+
+    // Friends
+    friend struct detail::functions;
+    friend struct detail::constant_expr<T>;
+
+    /** Constructor (from different fixed point type)
+     *
+     * @param[in] val Fixed point
+     * @param[in] p   Fixed point precision
+     */
+    template <typename U>
+    fixed_point(fixed_point<U> val, uint8_t p)
+        : _value(0), _fixed_point_position(p)
+    {
+        assert(p > 0 && p < std::numeric_limits<T>::digits);
+        T v = 0;
+
+        if(std::numeric_limits<T>::digits < std::numeric_limits<U>::digits)
+        {
+            val.rescale(p);
+            v = detail::constant_expr<T>::saturate_cast(val.raw());
+        }
+        else
+        {
+            auto v_cast = static_cast<fixed_point<T>>(val);
+            v_cast.rescale(p);
+            v = v_cast.raw();
+        }
+        _value = static_cast<T>(v);
+    }
+    /** Constructor (from integer)
+     *
+     * @param[in] val    Integer value to be represented as fixed point
+     * @param[in] p      Fixed point precision
+     * @param[in] is_raw If true val is a raw fixed point value else an integer
+     */
+    template <typename U, typename = typename std::enable_if<std::is_integral<U>::value>::type>
+    fixed_point(U val, uint8_t p, bool is_raw = false)
+        : _value(val << p), _fixed_point_position(p)
+    {
+        if(is_raw)
+        {
+            _value = val;
+        }
+    }
+    /** Constructor (from float)
+     *
+     * @param[in] val Float value to be represented as fixed point
+     * @param[in] p   Fixed point precision
+     */
+    fixed_point(float val, uint8_t p)
+        : _value(detail::constant_expr<T>::to_fixed(val, p)), _fixed_point_position(p)
+    {
+        assert(p > 0 && p < std::numeric_limits<T>::digits);
+    }
+    /** Constructor (from float string)
+     *
+     * @param[in] str Float string to be represented as fixed point
+     * @param[in] p   Fixed point precision
+     */
+    fixed_point(std::string str, uint8_t p)
+        : _value(detail::constant_expr<T>::to_fixed(arm_compute::test::cpp11::stof(str), p)), _fixed_point_position(p)
+    {
+        assert(p > 0 && p < std::numeric_limits<T>::digits);
+    }
+    /** Default copy constructor */
+    fixed_point &operator=(const fixed_point &) = default;
+    /** Default move constructor */
+    fixed_point &operator=(fixed_point &&) = default;
+    /** Default copy assignment operator */
+    fixed_point(const fixed_point &) = default;
+    /** Default move assignment operator */
+    fixed_point(fixed_point &&) = default;
+
+    /** Float conversion operator
+     *
+     * @return Float representation of fixed point
+     */
+    operator float() const
+    {
+        return detail::constant_expr<T>::to_float(_value, _fixed_point_position);
+    }
+    /** Integer conversion operator
+     *
+     * @return Integer representation of fixed point
+     */
+    template <typename U, typename = typename std::enable_if<std::is_integral<T>::value>::type>
+    operator U() const
+    {
+        return detail::constant_expr<T>::to_int(_value, _fixed_point_position);
+    }
+    /** Convert to different fixed point of different type but same precision
+     *
+     * @note Down-conversion might fail.
+     */
+    template <typename U>
+    operator fixed_point<U>()
+    {
+        U val = static_cast<U>(_value);
+        if(std::numeric_limits<U>::digits < std::numeric_limits<T>::digits)
+        {
+            val = detail::constant_expr<U>::saturate_cast(_value);
+        }
+        return fixed_point<U>(val, _fixed_point_position, true);
+    }
+
+    /** Arithmetic += assignment operator
+     *
+     * @param[in] rhs Fixed point operand
+     *
+     * @return Reference to this fixed point
+     */
+    template <typename U>
+    fixed_point<T> &operator+=(const fixed_point<U> &rhs)
+    {
+        fixed_point<T> val(rhs, _fixed_point_position);
+        _value += val.raw();
+        return *this;
+    }
+    /** Arithmetic -= assignment operator
+     *
+     * @param[in] rhs Fixed point operand
+     *
+     * @return Reference to this fixed point
+     */
+    template <typename U>
+    fixed_point<T> &operator-=(const fixed_point<U> &rhs)
+    {
+        fixed_point<T> val(rhs, _fixed_point_position);
+        _value -= val.raw();
+        return *this;
+    }
+
+    /** Raw value accessor
+     *
+     * @return Raw fixed point value
+     */
+    T raw() const
+    {
+        return _value;
+    }
+    /** Precision accessor
+     *
+     * @return Precision of fixed point
+     */
+    uint8_t precision() const
+    {
+        return _fixed_point_position;
+    }
+    /** Rescale a fixed point to a new precision
+     *
+     * @param[in] p New fixed point precision
+     */
+    void rescale(uint8_t p)
+    {
+        assert(p > 0 && p < std::numeric_limits<T>::digits);
+
+        if(p > _fixed_point_position)
+        {
+            _value <<= (p - _fixed_point_position);
+        }
+        else if(p < _fixed_point_position)
+        {
+            _value >>= (_fixed_point_position - p);
+        }
+
+        _fixed_point_position = p;
+    }
+
+private:
+    T       _value;                /**< Fixed point raw value */
+    uint8_t _fixed_point_position; /**< Fixed point precision */
+};
+
+namespace detail
+{
+/** Count the number of leading zero bits in the given value.
+ *
+ * @param[in] value Input value.
+ *
+ * @return Number of leading zero bits.
+ */
+template <typename T>
+constexpr int clz(T value)
+{
+    using unsigned_T = typename std::make_unsigned<T>::type;
+    // __builtin_clz is available for int. Need to correct reported number to
+    // match the original type.
+    return __builtin_clz(value) - (32 - std::numeric_limits<unsigned_T>::digits);
+}
+
+template <typename T>
+struct constant_expr
+{
+    /** Calculate representation of 1 in fixed point given a fixed point precision
+     *
+     * @param[in] p Fixed point precision
+     *
+     * @return Representation of value 1 in fixed point.
+     */
+    static constexpr T fixed_one(uint8_t p)
+    {
+        return (1 << p);
+    }
+    /** Calculate fixed point precision step given a fixed point precision
+     *
+     * @param[in] p Fixed point precision
+     *
+     * @return Fixed point precision step
+     */
+    static constexpr float fixed_step(uint8_t p)
+    {
+        return (1.0f / static_cast<float>(1 << p));
+    }
+
+    /** Convert a fixed point value to float given its precision.
+     *
+     * @param[in] val Fixed point value
+     * @param[in] p   Fixed point precision
+     *
+     * @return Float representation of the fixed point number
+     */
+    static constexpr float to_float(T val, uint8_t p)
+    {
+        return static_cast<float>(val * fixed_step(p));
+    }
+    /** Convert a fixed point value to integer given its precision.
+     *
+     * @param[in] val Fixed point value
+     * @param[in] p   Fixed point precision
+     *
+     * @return Integer of the fixed point number
+     */
+    static constexpr T to_int(T val, uint8_t p)
+    {
+        return val >> p;
+    }
+    /** Convert a single precision floating point value to a fixed point representation given its precision.
+     *
+     * @param[in] val Floating point value
+     * @param[in] p   Fixed point precision
+     *
+     * @return The raw fixed point representation
+     */
+    static constexpr T to_fixed(float val, uint8_t p)
+    {
+        return static_cast<T>(val * fixed_one(p) + ((val >= 0) ? 0.5 : -0.5));
+    }
+    /** Clamp value between two ranges
+     *
+     * @param[in] val Value to clamp
+     * @param[in] min Minimum value to clamp to
+     * @param[in] max Maximum value to clamp to
+     *
+     * @return clamped value
+     */
+    static constexpr T clamp(T val, T min, T max)
+    {
+        return std::min(std::max(val, min), max);
+    }
+    /** Saturate given number
+     *
+     * @param[in] val Value to saturate
+     *
+     * @return Saturated value
+     */
+    template <typename U>
+    static constexpr T saturate_cast(U val)
+    {
+        return static_cast<T>(std::min<U>(std::max<U>(val, static_cast<U>(std::numeric_limits<T>::min())), static_cast<U>(std::numeric_limits<T>::max())));
+    }
+};
+struct functions
+{
+    /** Output stream operator
+     *
+     * @param[in] s Output stream
+     * @param[in] x Fixed point value
+     *
+     * @return Reference output to updated stream
+     */
+    template <typename T, typename U, typename traits>
+    static std::basic_ostream<T, traits> &write(std::basic_ostream<T, traits> &s, fixed_point<U> &x)
+    {
+        return s << static_cast<float>(x);
+    }
+    /** Signbit of a fixed point number.
+     *
+     * @param[in] x Fixed point number
+     *
+     * @return True if negative else false.
+     */
+    template <typename T>
+    static bool signbit(fixed_point<T> x)
+    {
+        return ((x._value >> std::numeric_limits<T>::digits) != 0);
+    }
+    /** Checks if two fixed point numbers are equal
+     *
+     * @param[in] x First fixed point operand
+     * @param[in] y Second fixed point operand
+     *
+     * @return True if fixed points are equal else false
+     */
+    template <typename T>
+    static bool isequal(fixed_point<T> x, fixed_point<T> y)
+    {
+        uint8_t p = std::min(x._fixed_point_position, y._fixed_point_position);
+        x.rescale(p);
+        y.rescale(p);
+        return (x._value == y._value);
+    }
+    /** Checks if two fixed point number are not equal
+     *
+     * @param[in] x First fixed point operand
+     * @param[in] y Second fixed point operand
+     *
+     * @return True if fixed points are not equal else false
+     */
+    template <typename T>
+    static bool isnotequal(fixed_point<T> x, fixed_point<T> y)
+    {
+        return !isequal(x, y);
+    }
+    /** Checks if one fixed point is greater than the other
+     *
+     * @param[in] x First fixed point operand
+     * @param[in] y Second fixed point operand
+     *
+     * @return True if fixed point is greater than other
+     */
+    template <typename T>
+    static bool isgreater(fixed_point<T> x, fixed_point<T> y)
+    {
+        uint8_t p = std::min(x._fixed_point_position, y._fixed_point_position);
+        x.rescale(p);
+        y.rescale(p);
+        return (x._value > y._value);
+    }
+    /** Checks if one fixed point is greater or equal than the other
+     *
+     * @param[in] x First fixed point operand
+     * @param[in] y Second fixed point operand
+     *
+     * @return True if fixed point is greater or equal than other
+     */
+    template <typename T>
+    static bool isgreaterequal(fixed_point<T> x, fixed_point<T> y)
+    {
+        uint8_t p = std::min(x._fixed_point_position, y._fixed_point_position);
+        x.rescale(p);
+        y.rescale(p);
+        return (x._value >= y._value);
+    }
+    /** Checks if one fixed point is less than the other
+     *
+     * @param[in] x First fixed point operand
+     * @param[in] y Second fixed point operand
+     *
+     * @return True if fixed point is less than other
+     */
+    template <typename T>
+    static bool isless(fixed_point<T> x, fixed_point<T> y)
+    {
+        uint8_t p = std::min(x._fixed_point_position, y._fixed_point_position);
+        x.rescale(p);
+        y.rescale(p);
+        return (x._value < y._value);
+    }
+    /** Checks if one fixed point is less or equal than the other
+     *
+     * @param[in] x First fixed point operand
+     * @param[in] y Second fixed point operand
+     *
+     * @return True if fixed point is less or equal than other
+     */
+    template <typename T>
+    static bool islessequal(fixed_point<T> x, fixed_point<T> y)
+    {
+        uint8_t p = std::min(x._fixed_point_position, y._fixed_point_position);
+        x.rescale(p);
+        y.rescale(p);
+        return (x._value <= y._value);
+    }
+    /** Checks if one fixed point is less or greater than the other
+     *
+     * @param[in] x First fixed point operand
+     * @param[in] y Second fixed point operand
+     *
+     * @return True if fixed point is less or greater than other
+     */
+    template <typename T>
+    static bool islessgreater(fixed_point<T> x, fixed_point<T> y)
+    {
+        return isnotequal(x, y);
+    }
+    /** Clamp fixed point to specific range.
+     *
+     * @param[in] x   Fixed point operand
+     * @param[in] min Minimum value to clamp to
+     * @param[in] max Maximum value to clamp to
+     *
+     * @return Clamped result
+     */
+    template <typename T>
+    static fixed_point<T> clamp(fixed_point<T> x, T min, T max)
+    {
+        return fixed_point<T>(constant_expr<T>::clamp(x._value, min, max), x._fixed_point_position, true);
+    }
+    /** Negate number
+     *
+     * @param[in] x Fixed point operand
+     *
+     * @return Negated fixed point result
+     */
+    template <OverflowPolicy OP = OverflowPolicy::SATURATE, typename T>
+    static fixed_point<T> negate(fixed_point<T> x)
+    {
+        using promoted_T = typename traits::promote<T>::type;
+        promoted_T val   = -x._value;
+        if(OP == OverflowPolicy::SATURATE)
+        {
+            val = constant_expr<T>::saturate_cast(val);
+        }
+        return fixed_point<T>(static_cast<T>(val), x._fixed_point_position, true);
+    }
+    /** Perform addition among two fixed point numbers
+     *
+     * @param[in] x First fixed point operand
+     * @param[in] y Second fixed point operand
+     *
+     * @return Result fixed point with precision equal to minimum precision of both operands
+     */
+    template <OverflowPolicy OP = OverflowPolicy::SATURATE, typename T>
+    static fixed_point<T> add(fixed_point<T> x, fixed_point<T> y)
+    {
+        uint8_t p = std::min(x._fixed_point_position, y._fixed_point_position);
+        x.rescale(p);
+        y.rescale(p);
+        if(OP == OverflowPolicy::SATURATE)
+        {
+            using type = typename traits::promote<T>::type;
+            type val   = static_cast<type>(x._value) + static_cast<type>(y._value);
+            val        = constant_expr<T>::saturate_cast(val);
+            return fixed_point<T>(static_cast<T>(val), p, true);
+        }
+        else
+        {
+            return fixed_point<T>(x._value + y._value, p, true);
+        }
+    }
+    /** Perform subtraction among two fixed point numbers
+     *
+     * @param[in] x First fixed point operand
+     * @param[in] y Second fixed point operand
+     *
+     * @return Result fixed point with precision equal to minimum precision of both operands
+     */
+    template <OverflowPolicy OP = OverflowPolicy::SATURATE, typename T>
+    static fixed_point<T> sub(fixed_point<T> x, fixed_point<T> y)
+    {
+        uint8_t p = std::min(x._fixed_point_position, y._fixed_point_position);
+        x.rescale(p);
+        y.rescale(p);
+        if(OP == OverflowPolicy::SATURATE)
+        {
+            using type = typename traits::promote<T>::type;
+            type val   = static_cast<type>(x._value) - static_cast<type>(y._value);
+            val        = constant_expr<T>::saturate_cast(val);
+            return fixed_point<T>(static_cast<T>(val), p, true);
+        }
+        else
+        {
+            return fixed_point<T>(x._value - y._value, p, true);
+        }
+    }
+    /** Perform multiplication among two fixed point numbers
+     *
+     * @param[in] x First fixed point operand
+     * @param[in] y Second fixed point operand
+     *
+     * @return Result fixed point with precision equal to minimum precision of both operands
+     */
+    template <OverflowPolicy OP = OverflowPolicy::SATURATE, typename T>
+    static fixed_point<T> mul(fixed_point<T> x, fixed_point<T> y)
+    {
+        using promoted_T        = typename traits::promote<T>::type;
+        uint8_t    p_min        = std::min(x._fixed_point_position, y._fixed_point_position);
+        uint8_t    p_max        = std::max(x._fixed_point_position, y._fixed_point_position);
+        promoted_T round_factor = (1 << (p_max - 1));
+        promoted_T val          = ((static_cast<promoted_T>(x._value) * static_cast<promoted_T>(y._value)) + round_factor) >> p_max;
+        if(OP == OverflowPolicy::SATURATE)
+        {
+            val = constant_expr<T>::saturate_cast(val);
+        }
+        return fixed_point<T>(static_cast<T>(val), p_min, true);
+    }
+    /** Perform division among two fixed point numbers
+     *
+     * @param[in] x First fixed point operand
+     * @param[in] y Second fixed point operand
+     *
+     * @return Result fixed point with precision equal to minimum precision of both operands
+     */
+    template <OverflowPolicy OP = OverflowPolicy::SATURATE, typename T>
+    static fixed_point<T> div(fixed_point<T> x, fixed_point<T> y)
+    {
+        using promoted_T = typename traits::promote<T>::type;
+        uint8_t    p     = std::min(x._fixed_point_position, y._fixed_point_position);
+        promoted_T denom = static_cast<promoted_T>(y._value);
+        if(denom != 0)
+        {
+            promoted_T val = (static_cast<promoted_T>(x._value) << std::max(x._fixed_point_position, y._fixed_point_position)) / denom;
+            if(OP == OverflowPolicy::SATURATE)
+            {
+                val = constant_expr<T>::saturate_cast(val);
+            }
+            return fixed_point<T>(static_cast<T>(val), p, true);
+        }
+        else
+        {
+            T val = (x._value < 0) ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
+            return fixed_point<T>(val, p, true);
+        }
+    }
+    /** Shift left
+     *
+     * @param[in] x     Fixed point operand
+     * @param[in] shift Shift value
+     *
+     * @return Shifted value
+     */
+    template <OverflowPolicy OP = OverflowPolicy::SATURATE, typename T>
+    static fixed_point<T> shift_left(fixed_point<T> x, size_t shift)
+    {
+        using promoted_T = typename traits::promote<T>::type;
+        promoted_T val   = static_cast<promoted_T>(x._value) << shift;
+        if(OP == OverflowPolicy::SATURATE)
+        {
+            val = constant_expr<T>::saturate_cast(val);
+        }
+        return fixed_point<T>(static_cast<T>(val), x._fixed_point_position, true);
+    }
+    /** Shift right
+     *
+     * @param[in] x     Fixed point operand
+     * @param[in] shift Shift value
+     *
+     * @return Shifted value
+     */
+    template <typename T>
+    static fixed_point<T> shift_right(fixed_point<T> x, size_t shift)
+    {
+        return fixed_point<T>(x._value >> shift, x._fixed_point_position, true);
+    }
+    /** Calculate absolute value
+     *
+     * @param[in] x Fixed point operand
+     *
+     * @return Absolute value of operand
+     */
+    template <typename T>
+    static fixed_point<T> abs(fixed_point<T> x)
+    {
+        using promoted_T = typename traits::promote<T>::type;
+        T val            = (x._value < 0) ? constant_expr<T>::saturate_cast(-static_cast<promoted_T>(x._value)) : x._value;
+        return fixed_point<T>(val, x._fixed_point_position, true);
+    }
+    /** Calculate the logarithm of a fixed point number
+     *
+     * @param[in] x Fixed point operand
+     *
+     * @return Logarithm value of operand
+     */
+    template <typename T>
+    static fixed_point<T> log(fixed_point<T> x)
+    {
+        uint8_t p         = x._fixed_point_position;
+        auto    const_one = fixed_point<T>(static_cast<T>(1), p);
+
+        // Logarithm of 1 is zero and logarithm of negative values is not defined in R, so return 0.
+        // Also, log(x) == -log(1/x) for 0 < x < 1.
+        if(isequal(x, const_one) || islessequal(x, fixed_point<T>(static_cast<T>(0), p)))
+        {
+            return fixed_point<T>(static_cast<T>(0), p, true);
+        }
+        else if(isless(x, const_one))
+        {
+            return mul(log(div(const_one, x)), fixed_point<T>(-1, p));
+        }
+
+        // Remove even powers of 2
+        T shift_val = 31 - __builtin_clz(x._value >> p);
+        x           = shift_right(x, shift_val);
+        x           = sub(x, const_one);
+
+        // Constants
+        auto ln2 = fixed_point<T>(0.6931471, p);
+        auto A   = fixed_point<T>(1.4384189, p);
+        auto B   = fixed_point<T>(-0.67719, p);
+        auto C   = fixed_point<T>(0.3218538, p);
+        auto D   = fixed_point<T>(-0.0832229, p);
+
+        // Polynomial expansion
+        auto sum = add(mul(x, D), C);
+        sum      = add(mul(x, sum), B);
+        sum      = add(mul(x, sum), A);
+        sum      = mul(x, sum);
+
+        return mul(add(sum, fixed_point<T>(static_cast<T>(shift_val), p)), ln2);
+    }
+    /** Calculate the exponential of a fixed point number.
+     *
+     * exp(x) = exp(floor(x)) * exp(x - floor(x))
+     *        = pow(2, floor(x) / ln(2)) * exp(x - floor(x))
+     *        = exp(x - floor(x)) << (floor(x) / ln(2))
+     *
+     * @param[in] x Fixed point operand
+     *
+     * @return Exponential value of operand
+     */
+    template <typename T>
+    static fixed_point<T> exp(fixed_point<T> x)
+    {
+        uint8_t p = x._fixed_point_position;
+        // Constants
+        auto const_one = fixed_point<T>(1, p);
+        auto ln2       = fixed_point<T>(0.6931471, p);
+        auto inv_ln2   = fixed_point<T>(1.442695, p);
+        auto A         = fixed_point<T>(0.9978546, p);
+        auto B         = fixed_point<T>(0.4994721, p);
+        auto C         = fixed_point<T>(0.1763723, p);
+        auto D         = fixed_point<T>(0.0435108, p);
+
+        T scaled_int_part = detail::constant_expr<T>::to_int(mul(x, inv_ln2)._value, p);
+
+        // Polynomial expansion
+        auto frac_part = sub(x, mul(ln2, fixed_point<T>(scaled_int_part, p)));
+        auto taylor    = add(mul(frac_part, D), C);
+        taylor         = add(mul(frac_part, taylor), B);
+        taylor         = add(mul(frac_part, taylor), A);
+        taylor         = mul(frac_part, taylor);
+        taylor         = add(taylor, const_one);
+
+        // Saturate value
+        if(static_cast<T>(clz(taylor.raw())) <= scaled_int_part)
+        {
+            return fixed_point<T>(std::numeric_limits<T>::max(), p, true);
+        }
+
+        return (scaled_int_part < 0) ? shift_right(taylor, -scaled_int_part) : shift_left(taylor, scaled_int_part);
+    }
+    /** Calculate the inverse square root of a fixed point number
+     *
+     * @param[in] x Fixed point operand
+     *
+     * @return Inverse square root value of operand
+     */
+    template <typename T>
+    static fixed_point<T> inv_sqrt(fixed_point<T> x)
+    {
+        const uint8_t p     = x._fixed_point_position;
+        int8_t        shift = std::numeric_limits<T>::digits - (p + detail::clz(x._value));
+
+        shift += std::numeric_limits<T>::is_signed ? 1 : 0;
+
+        const auto           three_half = fixed_point<T>(1.5f, p);
+        fixed_point<T>       a          = shift < 0 ? shift_left(x, -shift) : shift_right(x, shift);
+        const fixed_point<T> x_half     = shift_right(a, 1);
+
+        // We need three iterations to find the result
+        for(int i = 0; i < 3; ++i)
+        {
+            a = mul(a, sub(three_half, mul(x_half, mul(a, a))));
+        }
+
+        return (shift < 0) ? shift_left(a, -shift >> 1) : shift_right(a, shift >> 1);
+    }
+    /** Calculate the hyperbolic tangent of a fixed point number
+     *
+     * @param[in] x Fixed point operand
+     *
+     * @return Hyperbolic tangent of the operand
+     */
+    template <typename T>
+    static fixed_point<T> tanh(fixed_point<T> x)
+    {
+        uint8_t p = x._fixed_point_position;
+        // Constants
+        auto const_one = fixed_point<T>(1, p);
+        auto const_two = fixed_point<T>(2, p);
+
+        auto exp2x = exp(const_two * x);
+        auto num   = exp2x - const_one;
+        auto den   = exp2x + const_one;
+        auto tanh  = num / den;
+
+        return tanh;
+    }
+    /** Calculate the a-th power of a fixed point number.
+     *
+     *  The power is computed as x^a = e^(log(x) * a)
+     *
+     * @param[in] x Fixed point operand
+     * @param[in] a Fixed point exponent
+     *
+     * @return a-th power of the operand
+     */
+    template <typename T>
+    static fixed_point<T> pow(fixed_point<T> x, fixed_point<T> a)
+    {
+        return exp(log(x) * a);
+    }
+};
+
+template <typename T>
+bool operator==(const fixed_point<T> &lhs, const fixed_point<T> &rhs)
+{
+    return functions::isequal(lhs, rhs);
+}
+template <typename T>
+bool operator!=(const fixed_point<T> &lhs, const fixed_point<T> &rhs)
+{
+    return !operator==(lhs, rhs);
+}
+template <typename T>
+bool operator<(const fixed_point<T> &lhs, const fixed_point<T> &rhs)
+{
+    return functions::isless(lhs, rhs);
+}
+template <typename T>
+bool operator>(const fixed_point<T> &lhs, const fixed_point<T> &rhs)
+{
+    return operator<(rhs, lhs);
+}
+template <typename T>
+bool operator<=(const fixed_point<T> &lhs, const fixed_point<T> &rhs)
+{
+    return !operator>(lhs, rhs);
+}
+template <typename T>
+bool operator>=(const fixed_point<T> &lhs, const fixed_point<T> &rhs)
+{
+    return !operator<(lhs, rhs);
+}
+template <typename T>
+fixed_point<T> operator+(const fixed_point<T> &lhs, const fixed_point<T> &rhs)
+{
+    return functions::add(lhs, rhs);
+}
+template <typename T>
+fixed_point<T> operator-(const fixed_point<T> &lhs, const fixed_point<T> &rhs)
+{
+    return functions::sub(lhs, rhs);
+}
+template <typename T>
+fixed_point<T> operator-(const fixed_point<T> &rhs)
+{
+    return functions::negate(rhs);
+}
+template <typename T>
+fixed_point<T> operator*(fixed_point<T> x, fixed_point<T> y)
+{
+    return functions::mul(x, y);
+}
+template <typename T>
+fixed_point<T> operator/(fixed_point<T> x, fixed_point<T> y)
+{
+    return functions::div(x, y);
+}
+template <typename T>
+fixed_point<T> operator>>(fixed_point<T> x, size_t shift)
+{
+    return functions::shift_right(x, shift);
+}
+template <typename T>
+fixed_point<T> operator<<(fixed_point<T> x, size_t shift)
+{
+    return functions::shift_left(x, shift);
+}
+template <typename T, typename U, typename traits>
+std::basic_ostream<T, traits> &operator<<(std::basic_ostream<T, traits> &s, fixed_point<U> x)
+{
+    return functions::write(s, x);
+}
+template <typename T>
+inline fixed_point<T> min(fixed_point<T> x, fixed_point<T> y)
+{
+    return x > y ? y : x;
+}
+template <typename T>
+inline fixed_point<T> max(fixed_point<T> x, fixed_point<T> y)
+{
+    return x > y ? x : y;
+}
+template <OverflowPolicy OP = OverflowPolicy::SATURATE, typename T>
+inline fixed_point<T> add(fixed_point<T> x, fixed_point<T> y)
+{
+    return functions::add<OP>(x, y);
+}
+template <OverflowPolicy OP = OverflowPolicy::SATURATE, typename T>
+inline fixed_point<T> sub(fixed_point<T> x, fixed_point<T> y)
+{
+    return functions::sub<OP>(x, y);
+}
+template <OverflowPolicy OP = OverflowPolicy::SATURATE, typename T>
+inline fixed_point<T> mul(fixed_point<T> x, fixed_point<T> y)
+{
+    return functions::mul<OP>(x, y);
+}
+template <typename T>
+inline fixed_point<T> div(fixed_point<T> x, fixed_point<T> y)
+{
+    return functions::div(x, y);
+}
+template <typename T>
+inline fixed_point<T> abs(fixed_point<T> x)
+{
+    return functions::abs(x);
+}
+template <typename T>
+inline fixed_point<T> clamp(fixed_point<T> x, T min, T max)
+{
+    return functions::clamp(x, min, max);
+}
+template <typename T>
+inline fixed_point<T> exp(fixed_point<T> x)
+{
+    return functions::exp(x);
+}
+template <typename T>
+inline fixed_point<T> log(fixed_point<T> x)
+{
+    return functions::log(x);
+}
+template <typename T>
+inline fixed_point<T> inv_sqrt(fixed_point<T> x)
+{
+    return functions::inv_sqrt(x);
+}
+template <typename T>
+inline fixed_point<T> tanh(fixed_point<T> x)
+{
+    return functions::tanh(x);
+}
+template <typename T>
+inline fixed_point<T> pow(fixed_point<T> x, fixed_point<T> a)
+{
+    return functions::pow(x, a);
+}
+} // namespace detail
+
+// Expose operators
+using detail::operator==;
+using detail::operator!=;
+using detail::operator<;
+using detail::operator>;
+using detail::operator<=;
+using detail::operator>=;
+using detail::operator+;
+using detail::operator-;
+using detail::operator*;
+using detail::operator/;
+using detail::operator>>;
+using detail::operator<<;
+
+// Expose additional functions
+using detail::min;
+using detail::max;
+using detail::add;
+using detail::sub;
+using detail::mul;
+using detail::div;
+using detail::abs;
+using detail::clamp;
+using detail::exp;
+using detail::log;
+using detail::inv_sqrt;
+using detail::tanh;
+using detail::pow;
+// TODO: floor
+// TODO: ceil
+// TODO: sqrt
+} // namespace fixed_point_arithmetic
+} // namespace test
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_TEST_VALIDATION_FIXEDPOINT_H__ */
diff --git a/tests/validation/Helpers.h b/tests/validation/Helpers.h
new file mode 100644
index 0000000000..cbaea4b894
--- /dev/null
+++ b/tests/validation/Helpers.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_VALIDATION_HELPERS_H__
+#define __ARM_COMPUTE_TEST_VALIDATION_HELPERS_H__
+
+#include "Types.h"
+
+#include <type_traits>
+#include <utility>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+/** Helper function to get the testing range for each activation layer.
+ *
+ * @param[in] activation           Activation function to test.
+ * @param[in] fixed_point_position (Optional) Number of bits for the fractional part. Defaults to 1.
+ *
+ * @return A pair containing the lower upper testing bounds for a given function.
+ */
+template <typename T>
+std::pair<T, T> get_activation_layer_test_bounds(ActivationLayerInfo::ActivationFunction activation, int fixed_point_position = 1)
+{
+    bool is_float = std::is_floating_point<T>::value;
+    std::pair<T, T> bounds;
+
+    // Set initial values
+    if(is_float)
+    {
+        bounds = std::make_pair(-255.f, 255.f);
+    }
+    else
+    {
+        bounds = std::make_pair(std::numeric_limits<T>::lowest(), std::numeric_limits<T>::max());
+    }
+
+    // Reduce testing ranges
+    switch(activation)
+    {
+        case ActivationLayerInfo::ActivationFunction::LOGISTIC:
+        case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
+            // Reduce range as exponent overflows
+            if(is_float)
+            {
+                bounds.first  = -40.f;
+                bounds.second = 40.f;
+            }
+            else
+            {
+                bounds.first  = -(1 << (fixed_point_position));
+                bounds.second = 1 << (fixed_point_position);
+            }
+            break;
+        case ActivationLayerInfo::ActivationFunction::TANH:
+            // Reduce range as exponent overflows
+            if(!is_float)
+            {
+                bounds.first  = -(1 << (fixed_point_position));
+                bounds.second = 1 << (fixed_point_position);
+            }
+            break;
+        case ActivationLayerInfo::ActivationFunction::SQRT:
+            // Reduce range as sqrt should take a non-negative number
+            bounds.first = (is_float) ? 0 : 1 << (fixed_point_position);
+            break;
+        default:
+            break;
+    }
+    return bounds;
+}
+
+/** Helper function to get the testing range for batch normalization layer.
+ *
+ * @param[in] fixed_point_position (Optional) Number of bits for the fractional part. Defaults to 1.
+ *
+ * @return A pair containing the lower upper testing bounds.
+ */
+template <typename T>
+std::pair<T, T> get_batchnormalization_layer_test_bounds(int fixed_point_position = 1)
+{
+    bool is_float = std::is_floating_point<T>::value;
+    std::pair<T, T> bounds;
+
+    // Set initial values
+    if(is_float)
+    {
+        bounds = std::make_pair(-1.f, 1.f);
+    }
+    else
+    {
+        bounds = std::make_pair(1, 1 << (fixed_point_position));
+    }
+
+    return bounds;
+}
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif //__ARM_COMPUTE_TEST_VALIDATION_HELPERS_H__
diff --git a/tests/validation/NEON/AbsoluteDifference.cpp b/tests/validation/NEON/AbsoluteDifference.cpp
new file mode 100644
index 0000000000..b7f45d2384
--- /dev/null
+++ b/tests/validation/NEON/AbsoluteDifference.cpp
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEAbsoluteDifference.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute Neon absolute difference function.
+ *
+ * @param[in] shape  Shape of the input and output tensors.
+ * @param[in] dt_in0 Data type of first input tensor.
+ * @param[in] dt_in1 Data type of second input tensor.
+ * @param[in] dt_out Data type of the output tensor.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_absolute_difference(const TensorShape &shape, DataType dt_in0, DataType dt_in1, DataType dt_out)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, dt_in0);
+    Tensor src2 = create_tensor(shape, dt_in1);
+    Tensor dst  = create_tensor(shape, dt_out);
+
+    // Create and configure function
+    NEAbsoluteDifference abs_d;
+    abs_d.configure(&src1, &src2, &dst);
+
+    // Allocate tensors
+    src1.allocator()->allocate();
+    src2.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src1.info()->is_resizable());
+    BOOST_TEST(!src2.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    library->fill_tensor_uniform(NEAccessor(src1), 0);
+    library->fill_tensor_uniform(NEAccessor(src2), 1);
+
+    // Compute function
+    abs_d.run();
+
+    return dst;
+}
+
+void validate_configuration(const Tensor &src1, const Tensor &src2, Tensor &dst, TensorShape shape)
+{
+    BOOST_TEST(src1.info()->is_resizable());
+    BOOST_TEST(src2.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create and configure function
+    NEAbsoluteDifference abs_d;
+    abs_d.configure(&src1, &src2, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src1.info()->valid_region(), valid_region);
+    validate(src2.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding(0, required_padding(shape.x(), 16), 0, 0);
+    validate(src1.info()->padding(), padding);
+    validate(src2.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(AbsoluteDifference)
+
+BOOST_AUTO_TEST_SUITE(U8)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()),
+                     shape)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, DataType::U8);
+    Tensor src2 = create_tensor(shape, DataType::U8);
+    Tensor dst  = create_tensor(shape, DataType::U8);
+
+    validate_configuration(src1, src2, dst, shape);
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes(),
+                     shape)
+{
+    // Compute function
+    Tensor dst = compute_absolute_difference(shape, DataType::U8, DataType::U8, DataType::U8);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_absolute_difference(shape, DataType::U8, DataType::U8, DataType::U8);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes(),
+                     shape)
+{
+    // Compute function
+    Tensor dst = compute_absolute_difference(shape, DataType::U8, DataType::U8, DataType::U8);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_absolute_difference(shape, DataType::U8, DataType::U8, DataType::U8);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(S16)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ DataType::U8, DataType::S16 }),
+                     shape, dt)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, dt);
+    Tensor src2 = create_tensor(shape, DataType::S16);
+    Tensor dst  = create_tensor(shape, DataType::S16);
+
+    validate_configuration(src1, src2, dst, shape);
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ DataType::U8, DataType::S16 }),
+                     shape, dt)
+{
+    // Compute function
+    Tensor dst = compute_absolute_difference(shape, dt, DataType::S16, DataType::S16);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_absolute_difference(shape, dt, DataType::S16, DataType::S16);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ DataType::U8, DataType::S16 }),
+                     shape, dt)
+{
+    // Compute function
+    Tensor dst = compute_absolute_difference(shape, dt, DataType::S16, DataType::S16);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_absolute_difference(shape, dt, DataType::S16, DataType::S16);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/Accumulate.cpp b/tests/validation/NEON/Accumulate.cpp
new file mode 100644
index 0000000000..e3ea37cd99
--- /dev/null
+++ b/tests/validation/NEON/Accumulate.cpp
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEAccumulate.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute Neon accumulate function.
+ *
+ * @param[in] shape Shape of the input and output tensors.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_accumulate(const TensorShape &shape)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, DataType::U8);
+    Tensor dst = create_tensor(shape, DataType::S16);
+
+    // Create and configure function
+    NEAccumulate acc;
+    acc.configure(&src, &dst);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    library->fill_tensor_uniform(NEAccessor(src), 0);
+    library->fill_tensor_uniform(NEAccessor(dst), 1);
+
+    // Compute function
+    acc.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(Accumulate)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()),
+                     shape)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, DataType::U8);
+    Tensor dst = create_tensor(shape, DataType::S16);
+
+    BOOST_TEST(src.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create and configure function
+    NEAccumulate acc;
+    acc.configure(&src, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding(0, required_padding(shape.x(), 16), 0, 0);
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes(),
+                     shape)
+{
+    // Compute function
+    Tensor dst = compute_accumulate(shape);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_accumulate(shape);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes(),
+                     shape)
+{
+    // Compute function
+    Tensor dst = compute_accumulate(shape);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_accumulate(shape);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/AccumulateSquared.cpp b/tests/validation/NEON/AccumulateSquared.cpp
new file mode 100644
index 0000000000..10263a02e3
--- /dev/null
+++ b/tests/validation/NEON/AccumulateSquared.cpp
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEAccumulate.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute Neon accumulate squared function.
+ *
+ * @param[in] shape Shape of the input and output tensors.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_accumulate_squared(const TensorShape &shape, uint32_t shift)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, DataType::U8);
+    Tensor dst = create_tensor(shape, DataType::S16);
+
+    // Create and configure function
+    NEAccumulateSquared acc;
+    acc.configure(&src, shift, &dst);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    // dst tensor filled with non-negative values
+    library->fill_tensor_uniform(NEAccessor(src), 0);
+    library->fill_tensor_uniform(NEAccessor(dst), 1, static_cast<int16_t>(0), std::numeric_limits<int16_t>::max());
+
+    // Compute function
+    acc.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(AccumulateSquared)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::xrange(0U, 16U),
+                     shape, shift)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, DataType::U8);
+    Tensor dst = create_tensor(shape, DataType::S16);
+
+    BOOST_TEST(src.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create and configure function
+    NEAccumulateSquared acc;
+    acc.configure(&src, shift, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding(0, required_padding(shape.x(), 16), 0, 0);
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::xrange(0U, 16U),
+                     shape, shift)
+{
+    // Compute function
+    Tensor dst = compute_accumulate_squared(shape, shift);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_accumulate_squared(shape, shift);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ 0U, 1U, 15U }),
+                     shape, shift)
+{
+    // Compute function
+    Tensor dst = compute_accumulate_squared(shape, shift);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_accumulate_squared(shape, shift);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/AccumulateWeighted.cpp b/tests/validation/NEON/AccumulateWeighted.cpp
new file mode 100644
index 0000000000..6d45848647
--- /dev/null
+++ b/tests/validation/NEON/AccumulateWeighted.cpp
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEAccumulate.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute Neon accumulate weighted function.
+ *
+ * @param[in] shape Shape of the input and output tensors.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_accumulate_weighted(const TensorShape &shape, float alpha)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, DataType::U8);
+    Tensor dst = create_tensor(shape, DataType::U8);
+
+    // Create and configure function
+    NEAccumulateWeighted acc;
+    acc.configure(&src, alpha, &dst);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    library->fill_tensor_uniform(NEAccessor(src), 0);
+    library->fill_tensor_uniform(NEAccessor(dst), 1);
+
+    // Compute function
+    acc.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(AccumulateWeighted)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ 0.f, 0.5f, 1.f }),
+                     shape, alpha)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, DataType::U8);
+    Tensor dst = create_tensor(shape, DataType::U8);
+
+    BOOST_TEST(src.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create and configure function
+    NEAccumulateWeighted acc;
+    acc.configure(&src, alpha, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding(0, required_padding(shape.x(), 16), 0, 0);
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ 0.f, 0.5f, 1.f }),
+                     shape, alpha)
+{
+    // Compute function
+    Tensor dst = compute_accumulate_weighted(shape, alpha);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_accumulate_weighted(shape, alpha);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ 0.f, 0.5f, 1.f }),
+                     shape, alpha)
+{
+    // Compute function
+    Tensor dst = compute_accumulate_weighted(shape, alpha);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_accumulate_weighted(shape, alpha);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/ActivationLayer.cpp b/tests/validation/NEON/ActivationLayer.cpp
new file mode 100644
index 0000000000..da304d8087
--- /dev/null
+++ b/tests/validation/NEON/ActivationLayer.cpp
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Helpers.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+#include <tuple>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Define tolerance of the activation layer
+ *
+ * @param[in] activation           The activation function used.
+ * @param[in] fixed_point_position Number of bits for the fractional part..
+ *
+ * @return Tolerance depending on the activation function.
+ */
+float activation_layer_tolerance(ActivationLayerInfo::ActivationFunction activation, int fixed_point_position = 0)
+{
+    switch(activation)
+    {
+        case ActivationLayerInfo::ActivationFunction::LOGISTIC:
+        case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
+        case ActivationLayerInfo::ActivationFunction::SQRT:
+        case ActivationLayerInfo::ActivationFunction::TANH:
+            return (fixed_point_position != 0) ? 5.f : 0.00001f;
+            break;
+        default:
+            return 0.f;
+    }
+}
+
+/** Compute Neon activation layer function.
+ *
+ * @param[in] shape                Shape of the input and output tensors.
+ * @param[in] dt                   Shape Data type of tensors.
+ * @param[in] act_info             Activation layer information.
+ * @param[in] fixed_point_position Number of bits for the fractional part of fixed point numbers.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_activation_layer(const TensorShape &shape, DataType dt, ActivationLayerInfo act_info, int fixed_point_position = 0)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, dt, 1, fixed_point_position);
+    Tensor dst = create_tensor(shape, dt, 1, fixed_point_position);
+
+    // Create and configure function
+    NEActivationLayer act_layer;
+    act_layer.configure(&src, &dst, act_info);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    if(dt == DataType::F32)
+    {
+        float min_bound = 0;
+        float max_bound = 0;
+        std::tie(min_bound, max_bound) = get_activation_layer_test_bounds<float>(act_info.activation());
+        std::uniform_real_distribution<> distribution(min_bound, max_bound);
+        library->fill(NEAccessor(src), distribution, 0);
+    }
+    else
+    {
+        int min_bound = 0;
+        int max_bound = 0;
+        std::tie(min_bound, max_bound) = get_activation_layer_test_bounds<int8_t>(act_info.activation(), fixed_point_position);
+        std::uniform_int_distribution<> distribution(min_bound, max_bound);
+        library->fill(NEAccessor(src), distribution, 0);
+    }
+
+    // Compute function
+    act_layer.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(ActivationLayer)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * CNNDataTypes(), shape, dt)
+{
+    // Set fixed point position data type allowed
+    int fixed_point_position = (arm_compute::is_data_type_fixed_point(dt)) ? 3 : 0;
+
+    // Create tensors
+    Tensor src = create_tensor(shape, dt, 1, fixed_point_position);
+    Tensor dst = create_tensor(shape, dt, 1, fixed_point_position);
+
+    BOOST_TEST(src.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create and configure function
+    NEActivationLayer act_layer;
+    act_layer.configure(&src, &dst, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS));
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding(0, required_padding(shape.x(), 16), 0, 0);
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+BOOST_AUTO_TEST_SUITE(Float)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * CNNFloatDataTypes() * ActivationFunctions(), shape, dt, act_function)
+{
+    // Create activation layer info
+    ActivationLayerInfo act_info(act_function, 1.f, 1.f);
+
+    // Compute function
+    Tensor dst = compute_activation_layer(shape, dt, act_info);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_activation_layer(shape, dt, act_info);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, activation_layer_tolerance(act_function));
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * CNNFloatDataTypes() * ActivationFunctions(), shape, dt, act_function)
+{
+    // Create activation layer info
+    ActivationLayerInfo act_info(act_function, 1.f, 1.f);
+
+    // Compute function
+    Tensor dst = compute_activation_layer(shape, dt, act_info);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_activation_layer(shape, dt, act_info);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, activation_layer_tolerance(act_function));
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+/** @note We test for fixed point precision [3,5] because [1,2] and [6,7] ranges
+ *        cause overflowing issues in most of the transcendentals functions.
+ */
+BOOST_AUTO_TEST_SUITE(Quantized)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * ActivationFunctions() * boost::unit_test::data::xrange(3, 6, 1),
+                     shape, act_function, fixed_point_position)
+{
+    // Create activation layer info
+    ActivationLayerInfo act_info(act_function, 1.f, 1.f);
+
+    // Compute function
+    Tensor dst = compute_activation_layer(shape, DataType::QS8, act_info, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_activation_layer(shape, DataType::QS8, act_info, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, activation_layer_tolerance(act_function, fixed_point_position));
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/ArithmeticAddition.cpp b/tests/validation/NEON/ArithmeticAddition.cpp
new file mode 100644
index 0000000000..5654a426fd
--- /dev/null
+++ b/tests/validation/NEON/ArithmeticAddition.cpp
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute Neon arithmetic addition function.
+ *
+ * @param[in] shape  Shape of the input and output tensors.
+ * @param[in] dt_in0 Data type of first input tensor.
+ * @param[in] dt_in1 Data type of second input tensor.
+ * @param[in] dt_out Data type of the output tensor.
+ * @param[in] policy Overflow policy of the operation.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_arithmetic_addition(const TensorShape &shape, DataType dt_in0, DataType dt_in1, DataType dt_out, ConvertPolicy policy)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, dt_in0);
+    Tensor src2 = create_tensor(shape, dt_in1);
+    Tensor dst  = create_tensor(shape, dt_out);
+
+    // Create and configure function
+    NEArithmeticAddition add;
+    add.configure(&src1, &src2, &dst, policy);
+
+    // Allocate tensors
+    src1.allocator()->allocate();
+    src2.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src1.info()->is_resizable());
+    BOOST_TEST(!src2.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    library->fill_tensor_uniform(NEAccessor(src1), 0);
+    library->fill_tensor_uniform(NEAccessor(src2), 1);
+
+    // Compute function
+    add.run();
+
+    return dst;
+}
+
+void validate_configuration(const Tensor &src1, const Tensor &src2, Tensor &dst, TensorShape shape, ConvertPolicy policy)
+{
+    BOOST_TEST(src1.info()->is_resizable());
+    BOOST_TEST(src2.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create and configure function
+    NEArithmeticAddition add;
+    add.configure(&src1, &src2, &dst, policy);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src1.info()->valid_region(), valid_region);
+    validate(src2.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding(0, required_padding(shape.x(), 16), 0, 0);
+    validate(src1.info()->padding(), padding);
+    validate(src2.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(ArithmeticAddition)
+
+BOOST_AUTO_TEST_SUITE(U8)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
+                     shape, policy)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, DataType::U8);
+    Tensor src2 = create_tensor(shape, DataType::U8);
+    Tensor dst  = create_tensor(shape, DataType::U8);
+
+    validate_configuration(src1, src2, dst, shape, policy);
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
+                     shape, policy)
+{
+    // Compute function
+    Tensor dst = compute_arithmetic_addition(shape, DataType::U8, DataType::U8, DataType::U8, policy);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_arithmetic_addition(shape, DataType::U8, DataType::U8, DataType::U8, policy);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(S16)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ DataType::U8, DataType::S16 }) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
+                     shape, dt, policy)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, dt);
+    Tensor src2 = create_tensor(shape, DataType::S16);
+    Tensor dst  = create_tensor(shape, DataType::S16);
+
+    validate_configuration(src1, src2, dst, shape, policy);
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ DataType::U8, DataType::S16 }) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
+                     shape, dt, policy)
+{
+    // Compute function
+    Tensor dst = compute_arithmetic_addition(shape, dt, DataType::S16, DataType::S16, policy);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_arithmetic_addition(shape, dt, DataType::S16, DataType::S16, policy);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ DataType::U8, DataType::S16 }) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
+                     shape, dt, policy)
+{
+    // Compute function
+    Tensor dst = compute_arithmetic_addition(shape, dt, DataType::S16, DataType::S16, policy);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_arithmetic_addition(shape, dt, DataType::S16, DataType::S16, policy);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(F32)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
+                     shape, policy)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, DataType::F32);
+    Tensor src2 = create_tensor(shape, DataType::F32);
+    Tensor dst  = create_tensor(shape, DataType::F32);
+
+    validate_configuration(src1, src2, dst, shape, policy);
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes(), shape)
+{
+    // Compute function
+    Tensor dst = compute_arithmetic_addition(shape, DataType::F32, DataType::F32, DataType::F32, ConvertPolicy::WRAP);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_arithmetic_addition(shape, DataType::F32, DataType::F32, DataType::F32, ConvertPolicy::WRAP);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
+                     shape, policy)
+{
+    // Compute function
+    Tensor dst = compute_arithmetic_addition(shape, DataType::F32, DataType::F32, DataType::F32, policy);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_arithmetic_addition(shape, DataType::F32, DataType::F32, DataType::F32, policy);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/ArithmeticSubtraction.cpp b/tests/validation/NEON/ArithmeticSubtraction.cpp
new file mode 100644
index 0000000000..9c0e9131e0
--- /dev/null
+++ b/tests/validation/NEON/ArithmeticSubtraction.cpp
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute Neon arithmetic subtraction function.
+ *
+ * @param[in] shape  Shape of the input and output tensors.
+ * @param[in] dt_in0 Data type of first input tensor.
+ * @param[in] dt_in1 Data type of second input tensor.
+ * @param[in] dt_out Data type of the output tensor.
+ * @param[in] policy Overflow policy of the operation.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_arithmetic_subtraction(const TensorShape &shape, DataType dt_in0, DataType dt_in1, DataType dt_out, ConvertPolicy policy)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, dt_in0);
+    Tensor src2 = create_tensor(shape, dt_in1);
+    Tensor dst  = create_tensor(shape, dt_out);
+
+    // Create and configure function
+    NEArithmeticSubtraction sub;
+    sub.configure(&src1, &src2, &dst, policy);
+
+    // Allocate tensors
+    src1.allocator()->allocate();
+    src2.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src1.info()->is_resizable());
+    BOOST_TEST(!src2.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    library->fill_tensor_uniform(NEAccessor(src1), 0);
+    library->fill_tensor_uniform(NEAccessor(src2), 1);
+
+    // Compute function
+    sub.run();
+
+    return dst;
+}
+
+void validate_configuration(const Tensor &src1, const Tensor &src2, Tensor &dst, TensorShape shape, ConvertPolicy policy)
+{
+    BOOST_TEST(src1.info()->is_resizable());
+    BOOST_TEST(src2.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create and configure function
+    NEArithmeticSubtraction sub;
+    sub.configure(&src1, &src2, &dst, policy);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src1.info()->valid_region(), valid_region);
+    validate(src2.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding(0, required_padding(shape.x(), 16), 0, 0);
+    validate(src1.info()->padding(), padding);
+    validate(src2.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(ArithmeticSubtraction)
+
+BOOST_AUTO_TEST_SUITE(U8)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
+                     shape, policy)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, DataType::U8);
+    Tensor src2 = create_tensor(shape, DataType::U8);
+    Tensor dst  = create_tensor(shape, DataType::U8);
+
+    validate_configuration(src1, src2, dst, shape, policy);
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
+                     shape, policy)
+{
+    // Compute function
+    Tensor dst = compute_arithmetic_subtraction(shape, DataType::U8, DataType::U8, DataType::U8, policy);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_arithmetic_subtraction(shape, DataType::U8, DataType::U8, DataType::U8, policy);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(S16)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ DataType::U8, DataType::S16 }) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
+                     shape, dt, policy)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, dt);
+    Tensor src2 = create_tensor(shape, DataType::S16);
+    Tensor dst  = create_tensor(shape, DataType::S16);
+
+    validate_configuration(src1, src2, dst, shape, policy);
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ DataType::U8, DataType::S16 }) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
+                     shape, dt, policy)
+{
+    // Compute function
+    Tensor dst = compute_arithmetic_subtraction(shape, dt, DataType::S16, DataType::S16, policy);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_arithmetic_subtraction(shape, dt, DataType::S16, DataType::S16, policy);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ DataType::U8, DataType::S16 }) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
+                     shape, dt, policy)
+{
+    // Compute function
+    Tensor dst = compute_arithmetic_subtraction(shape, dt, DataType::S16, DataType::S16, policy);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_arithmetic_subtraction(shape, dt, DataType::S16, DataType::S16, policy);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(F32)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
+                     shape, policy)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, DataType::F32);
+    Tensor src2 = create_tensor(shape, DataType::F32);
+    Tensor dst  = create_tensor(shape, DataType::F32);
+
+    validate_configuration(src1, src2, dst, shape, policy);
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes(), shape)
+{
+    // Compute function
+    Tensor dst = compute_arithmetic_subtraction(shape, DataType::F32, DataType::F32, DataType::F32, ConvertPolicy::WRAP);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_arithmetic_subtraction(shape, DataType::F32, DataType::F32, DataType::F32, ConvertPolicy::WRAP);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP }),
+                     shape, policy)
+{
+    // Compute function
+    Tensor dst = compute_arithmetic_subtraction(shape, DataType::F32, DataType::F32, DataType::F32, policy);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_arithmetic_subtraction(shape, DataType::F32, DataType::F32, DataType::F32, policy);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/BatchNormalizationLayer.cpp b/tests/validation/NEON/BatchNormalizationLayer.cpp
new file mode 100644
index 0000000000..7656b2f392
--- /dev/null
+++ b/tests/validation/NEON/BatchNormalizationLayer.cpp
@@ -0,0 +1,195 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TypePrinter.h"
+#include "dataset/BatchNormalizationLayerDataset.h"
+#include "tests/validation/Helpers.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h"
+
+#include <random>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+const float tolerance_f = 1e-05; /**< Tolerance value for comparing reference's output against floating point implementation's output */
+const float tolerance_q = 3;     /**< Tolerance value for comparing reference's output against quantized implementation's output */
+
+/** Compute Neon batch normalization function.
+ *
+ * @param[in] shape     Shape of the input and output tensors.
+ * @param[in] dt        Data type of input and output tensors.
+ * @param[in] norm_info Normalization Layer information.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_reference_batch_normalization_layer(const TensorShape &shape0, const TensorShape &shape1, DataType dt, float epsilon, int fixed_point_position = 0)
+{
+    // Create tensors
+    Tensor src   = create_tensor(shape0, dt, 1, fixed_point_position);
+    Tensor dst   = create_tensor(shape0, dt, 1, fixed_point_position);
+    Tensor mean  = create_tensor(shape1, dt, 1, fixed_point_position);
+    Tensor var   = create_tensor(shape1, dt, 1, fixed_point_position);
+    Tensor beta  = create_tensor(shape1, dt, 1, fixed_point_position);
+    Tensor gamma = create_tensor(shape1, dt, 1, fixed_point_position);
+
+    // Create and configure function
+    NEBatchNormalizationLayer norm;
+    norm.configure(&src, &dst, &mean, &var, &beta, &gamma, epsilon);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+    mean.allocator()->allocate();
+    var.allocator()->allocate();
+    beta.allocator()->allocate();
+    gamma.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+    BOOST_TEST(!mean.info()->is_resizable());
+    BOOST_TEST(!var.info()->is_resizable());
+    BOOST_TEST(!beta.info()->is_resizable());
+    BOOST_TEST(!gamma.info()->is_resizable());
+
+    // Fill tensors
+    if(dt == DataType::F32)
+    {
+        float min_bound = 0.f;
+        float max_bound = 0.f;
+        std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<float>();
+        std::uniform_real_distribution<> distribution(min_bound, max_bound);
+        std::uniform_real_distribution<> distribution_var(0, max_bound);
+        library->fill(NEAccessor(src), distribution, 0);
+        library->fill(NEAccessor(mean), distribution, 1);
+        library->fill(NEAccessor(var), distribution_var, 0);
+        library->fill(NEAccessor(beta), distribution, 3);
+        library->fill(NEAccessor(gamma), distribution, 4);
+    }
+    else
+    {
+        int min_bound = 0;
+        int max_bound = 0;
+        std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int8_t>(fixed_point_position);
+        std::uniform_int_distribution<> distribution(min_bound, max_bound);
+        std::uniform_int_distribution<> distribution_var(0, max_bound);
+        library->fill(NEAccessor(src), distribution, 0);
+        library->fill(NEAccessor(mean), distribution, 1);
+        library->fill(NEAccessor(var), distribution_var, 0);
+        library->fill(NEAccessor(beta), distribution, 3);
+        library->fill(NEAccessor(gamma), distribution, 4);
+    }
+
+    // Compute function
+    norm.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(BatchNormalizationLayer)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, RandomBatchNormalizationLayerDataset() * (boost::unit_test::data::make(DataType::F32) + boost::unit_test::data::make(DataType::QS8)), obj, dt)
+{
+    // Set fixed point position data type allowed
+    int fixed_point_position = (arm_compute::is_data_type_fixed_point(dt)) ? 3 : 0;
+
+    // Create tensors
+    Tensor src   = create_tensor(obj.shape0, dt, 1, fixed_point_position);
+    Tensor dst   = create_tensor(obj.shape0, dt, 1, fixed_point_position);
+    Tensor mean  = create_tensor(obj.shape1, dt, 1, fixed_point_position);
+    Tensor var   = create_tensor(obj.shape1, dt, 1, fixed_point_position);
+    Tensor beta  = create_tensor(obj.shape1, dt, 1, fixed_point_position);
+    Tensor gamma = create_tensor(obj.shape1, dt, 1, fixed_point_position);
+
+    BOOST_TEST(src.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+    BOOST_TEST(mean.info()->is_resizable());
+    BOOST_TEST(var.info()->is_resizable());
+    BOOST_TEST(beta.info()->is_resizable());
+    BOOST_TEST(gamma.info()->is_resizable());
+
+    // Create and configure function
+    NEBatchNormalizationLayer norm;
+    norm.configure(&src, &dst, &mean, &var, &beta, &gamma, obj.epsilon);
+
+    // Validate valid region
+    const ValidRegion valid_region     = shape_to_valid_region(obj.shape0);
+    const ValidRegion valid_region_vec = shape_to_valid_region(obj.shape1);
+    validate(src.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+    validate(mean.info()->valid_region(), valid_region_vec);
+    validate(var.info()->valid_region(), valid_region_vec);
+    validate(beta.info()->valid_region(), valid_region_vec);
+    validate(gamma.info()->valid_region(), valid_region_vec);
+}
+
+BOOST_AUTO_TEST_SUITE(Float)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(Random,
+                     RandomBatchNormalizationLayerDataset() * boost::unit_test::data::make(DataType::F32),
+                     obj, dt)
+{
+    // Compute function
+    Tensor dst = compute_reference_batch_normalization_layer(obj.shape0, obj.shape1, dt, obj.epsilon);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_batch_normalization_layer(obj.shape0, obj.shape1, dt, obj.epsilon);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance_f, 0);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(Quantized)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(Random,
+                     RandomBatchNormalizationLayerDataset() * boost::unit_test::data::make(DataType::QS8) * boost::unit_test::data::xrange(1, 6),
+                     obj, dt, fixed_point_position)
+{
+    // Compute function
+    Tensor dst = compute_reference_batch_normalization_layer(obj.shape0, obj.shape1, dt, obj.epsilon, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_batch_normalization_layer(obj.shape0, obj.shape1, dt, obj.epsilon, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance_q, 0);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/BitwiseAnd.cpp b/tests/validation/NEON/BitwiseAnd.cpp
new file mode 100644
index 0000000000..8c0eda992f
--- /dev/null
+++ b/tests/validation/NEON/BitwiseAnd.cpp
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEBitwiseAnd.h"
+#include "arm_compute/runtime/SubTensor.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute Neon bitwise and function.
+ *
+ * @param[in] shape Shape of the input and output tensors.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_bitwise_and(const TensorShape &shape)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, DataType::U8);
+    Tensor src2 = create_tensor(shape, DataType::U8);
+    Tensor dst  = create_tensor(shape, DataType::U8);
+
+    // Create and configure function
+    NEBitwiseAnd band;
+    band.configure(&src1, &src2, &dst);
+
+    // Allocate tensors
+    src1.allocator()->allocate();
+    src2.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src1.info()->is_resizable());
+    BOOST_TEST(!src2.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    library->fill_tensor_uniform(NEAccessor(src1), 0);
+    library->fill_tensor_uniform(NEAccessor(src2), 1);
+
+    // Compute function
+    band.run();
+
+    return dst;
+}
+
+/** Compute Neon bitwise and function that splits the input and output in two subtensor.
+ *
+ * @param[in] shape Shape of the input and output tensors.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_bitwise_and_subtensor(const TensorShape &shape)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, DataType::U8);
+    Tensor src2 = create_tensor(shape, DataType::U8);
+    Tensor dst  = create_tensor(shape, DataType::U8);
+
+    // Create SubTensors
+    int         coord_z   = shape.z() / 2;
+    TensorShape sub_shape = shape;
+    sub_shape.set(2, coord_z);
+
+    SubTensor src1_sub1(&src1, sub_shape, Coordinates());
+    SubTensor src1_sub2(&src1, sub_shape, Coordinates(0, 0, coord_z));
+    SubTensor src2_sub1(&src2, sub_shape, Coordinates());
+    SubTensor src2_sub2(&src2, sub_shape, Coordinates(0, 0, coord_z));
+    SubTensor dst_sub1(&dst, sub_shape, Coordinates());
+    SubTensor dst_sub2(&dst, sub_shape, Coordinates(0, 0, coord_z));
+
+    // Create and configure function
+    NEBitwiseAnd band1, band2;
+    band1.configure(&src1_sub1, &src2_sub1, &dst_sub1);
+    band2.configure(&src1_sub2, &src2_sub2, &dst_sub2);
+
+    // Allocate tensors
+    src1.allocator()->allocate();
+    src2.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src1.info()->is_resizable());
+    BOOST_TEST(!src2.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    std::uniform_int_distribution<> distribution(0, 255);
+    library->fill(NEAccessor(src1), distribution, 0);
+    library->fill(NEAccessor(src2), distribution, 1);
+
+    // Compute function
+    band1.run();
+    band2.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(BitwiseAnd)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, SmallShapes() + LargeShapes(), shape)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, DataType::U8);
+    Tensor src2 = create_tensor(shape, DataType::U8);
+    Tensor dst  = create_tensor(shape, DataType::U8);
+
+    BOOST_TEST(src1.info()->is_resizable());
+    BOOST_TEST(src2.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create and configure function
+    NEBitwiseAnd band;
+    band.configure(&src1, &src2, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src1.info()->valid_region(), valid_region);
+    validate(src2.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding(0, required_padding(shape.x(), 16), 0, 0);
+    validate(src1.info()->padding(), padding);
+    validate(src2.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes(), shape)
+{
+    // Compute function
+    Tensor dst = compute_bitwise_and(shape);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_bitwise_and(shape);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_AUTO_TEST_CASE(RunSubTensor)
+{
+    // Create shape
+    TensorShape shape(27U, 35U, 8U, 2U);
+
+    // Compute function
+    Tensor dst = compute_bitwise_and_subtensor(shape);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_bitwise_and(shape);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes(), shape)
+{
+    // Compute function
+    Tensor dst = compute_bitwise_and(shape);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_bitwise_and(shape);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/BitwiseNot.cpp b/tests/validation/NEON/BitwiseNot.cpp
new file mode 100644
index 0000000000..cb0a1fd0b5
--- /dev/null
+++ b/tests/validation/NEON/BitwiseNot.cpp
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEBitwiseNot.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute Neon bitwise not function.
+ *
+ * @param[in] shape Shape of the input and output tensors.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_bitwise_not(const TensorShape &shape)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, DataType::U8);
+    Tensor dst = create_tensor(shape, DataType::U8);
+
+    // Create not configure function
+    NEBitwiseNot bnot;
+    bnot.configure(&src, &dst);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    library->fill_tensor_uniform(NEAccessor(src), 0);
+
+    // Compute function
+    bnot.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(BitwiseNot)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, SmallShapes() + LargeShapes(), shape)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, DataType::U8);
+    Tensor dst = create_tensor(shape, DataType::U8);
+
+    BOOST_TEST(src.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create not configure function
+    NEBitwiseNot bnot;
+    bnot.configure(&src, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding(0, required_padding(shape.x(), 16), 0, 0);
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes(), shape)
+{
+    // Compute function
+    Tensor dst = compute_bitwise_not(shape);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_bitwise_not(shape);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes(), shape)
+{
+    // Compute function
+    Tensor dst = compute_bitwise_not(shape);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_bitwise_not(shape);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/BitwiseOr.cpp b/tests/validation/NEON/BitwiseOr.cpp
new file mode 100644
index 0000000000..cb853d3fd4
--- /dev/null
+++ b/tests/validation/NEON/BitwiseOr.cpp
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEBitwiseOr.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute Neon bitwise Or function.
+ *
+ * @param[in] shape Shape of the input and output tensors.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_bitwise_or(const TensorShape &shape)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, DataType::U8);
+    Tensor src2 = create_tensor(shape, DataType::U8);
+    Tensor dst  = create_tensor(shape, DataType::U8);
+
+    // Create and configure function
+    NEBitwiseOr bor;
+    bor.configure(&src1, &src2, &dst);
+
+    // Allocate tensors
+    src1.allocator()->allocate();
+    src2.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src1.info()->is_resizable());
+    BOOST_TEST(!src2.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    library->fill_tensor_uniform(NEAccessor(src1), 0);
+    library->fill_tensor_uniform(NEAccessor(src2), 1);
+
+    // Compute function
+    bor.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(BitwiseOr)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, SmallShapes() + LargeShapes(), shape)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, DataType::U8);
+    Tensor src2 = create_tensor(shape, DataType::U8);
+    Tensor dst  = create_tensor(shape, DataType::U8);
+
+    BOOST_TEST(src1.info()->is_resizable());
+    BOOST_TEST(src2.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create and configure function
+    NEBitwiseOr bor;
+    bor.configure(&src1, &src2, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src1.info()->valid_region(), valid_region);
+    validate(src2.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding(0, required_padding(shape.x(), 16), 0, 0);
+    validate(src1.info()->padding(), padding);
+    validate(src2.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes(), shape)
+{
+    // Compute function
+    Tensor dst = compute_bitwise_or(shape);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_bitwise_or(shape);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes(), shape)
+{
+    // Compute function
+    Tensor dst = compute_bitwise_or(shape);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_bitwise_or(shape);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/BitwiseXor.cpp b/tests/validation/NEON/BitwiseXor.cpp
new file mode 100644
index 0000000000..1715b04609
--- /dev/null
+++ b/tests/validation/NEON/BitwiseXor.cpp
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEBitwiseXor.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute Neon bitwise xor function.
+ *
+ * @param[in] shape Shape of the input and output tensors.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_bitwise_xor(const TensorShape &shape)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, DataType::U8);
+    Tensor src2 = create_tensor(shape, DataType::U8);
+    Tensor dst  = create_tensor(shape, DataType::U8);
+
+    // Create xor configure function
+    NEBitwiseXor bxor;
+    bxor.configure(&src1, &src2, &dst);
+
+    // Allocate tensors
+    src1.allocator()->allocate();
+    src2.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src1.info()->is_resizable());
+    BOOST_TEST(!src2.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    library->fill_tensor_uniform(NEAccessor(src1), 0);
+    library->fill_tensor_uniform(NEAccessor(src2), 1);
+
+    // Compute function
+    bxor.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(BitwiseXor)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, SmallShapes() + LargeShapes(), shape)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, DataType::U8);
+    Tensor src2 = create_tensor(shape, DataType::U8);
+    Tensor dst  = create_tensor(shape, DataType::U8);
+
+    BOOST_TEST(src1.info()->is_resizable());
+    BOOST_TEST(src2.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create xor configure function
+    NEBitwiseXor bxor;
+    bxor.configure(&src1, &src2, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src1.info()->valid_region(), valid_region);
+    validate(src2.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding(0, required_padding(shape.x(), 16), 0, 0);
+    validate(src1.info()->padding(), padding);
+    validate(src2.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes(), shape)
+{
+    // Compute function
+    Tensor dst = compute_bitwise_xor(shape);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_bitwise_xor(shape);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes(), shape)
+{
+    // Compute function
+    Tensor dst = compute_bitwise_xor(shape);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_bitwise_xor(shape);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/Box3x3.cpp b/tests/validation/NEON/Box3x3.cpp
new file mode 100644
index 0000000000..5da015c73a
--- /dev/null
+++ b/tests/validation/NEON/Box3x3.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEBox3x3.h"
+#include "arm_compute/runtime/SubTensor.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute Neon 3-by-3 box filter.
+ *
+ * @param[in] shape Shape of the input and output tensors.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_box3x3(const TensorShape &shape)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, DataType::U8);
+    Tensor dst = create_tensor(shape, DataType::U8);
+
+    // Create and configure function
+    NEBox3x3 band;
+    band.configure(&src, &dst, BorderMode::UNDEFINED);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    library->fill_tensor_uniform(NEAccessor(src), 0);
+
+    // Compute function
+    band.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(Box3x3)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, SmallShapes() + LargeShapes(), shape)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, DataType::U8);
+    Tensor dst = create_tensor(shape, DataType::U8);
+
+    BOOST_TEST(src.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create and configure function
+    NEBox3x3 band;
+    band.configure(&src, &dst, BorderMode::UNDEFINED);
+
+    // Validate valid region
+    const ValidRegion src_valid_region = shape_to_valid_region(shape);
+    const ValidRegion dst_valid_region = shape_to_valid_region_undefined_border(shape, BorderSize(1));
+    validate(src.info()->valid_region(), src_valid_region);
+    validate(dst.info()->valid_region(), dst_valid_region);
+
+    // Validate padding
+    const PaddingSize read_padding(0, required_padding_undefined_border_read(shape.x(), 16, 8), 0, 0);
+    const PaddingSize write_padding(0, required_padding_undefined_border_write(shape.x(), 8, 1), 0, 0);
+    validate(src.info()->padding(), read_padding);
+    validate(dst.info()->padding(), write_padding);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes(), shape)
+{
+    // Compute function
+    Tensor dst = compute_box3x3(shape);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_box3x3(shape);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, shape_to_valid_region_undefined_border(shape, BorderSize(1)));
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes(), shape)
+{
+    // Compute function
+    Tensor dst = compute_box3x3(shape);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_box3x3(shape);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, shape_to_valid_region_undefined_border(shape, BorderSize(1)));
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/CMakeLists.txt b/tests/validation/NEON/CMakeLists.txt
new file mode 100644
index 0000000000..52678f345b
--- /dev/null
+++ b/tests/validation/NEON/CMakeLists.txt
@@ -0,0 +1,55 @@
+# Copyright (c) 2017 ARM Limited.
+#
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+cmake_minimum_required (VERSION 3.1)
+
+set(arm_compute_test_validation_NEON_SOURCE_FILES
+    ${CMAKE_SOURCE_DIR}/NEON/Helper.h
+    ${CMAKE_SOURCE_DIR}/NEON/NEAccessor.h
+    ${CMAKE_CURRENT_SOURCE_DIR}/AbsoluteDifference.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/Accumulate.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/AccumulateSquared.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/AccumulateWeighted.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/ArithmeticAddition.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/ArithmeticSubtraction.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/BitwiseAnd.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/BitwiseNot.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/BitwiseOr.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/BitwiseXor.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/Box3x3.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/Fixedpoint/Exp_QS8.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/Fixedpoint/Invsqrt_QS8.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/Fixedpoint/Log_QS8.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/Fixedpoint/Reciprocal_QS8.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/NormalizationLayer.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/PixelWiseMultiplication.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/IntegralImage.cpp
+)
+
+add_library(arm_compute_test_validation_NEON OBJECT
+    ${arm_compute_test_validation_NEON_SOURCE_FILES}
+)
+
+set(arm_compute_test_validation_TARGET_OBJECTS
+    ${arm_compute_test_validation_TARGET_OBJECTS}
+    $<TARGET_OBJECTS:arm_compute_test_validation_NEON>
+    PARENT_SCOPE
+)
diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp
new file mode 100644
index 0000000000..a1dbe38bbf
--- /dev/null
+++ b/tests/validation/NEON/ConvolutionLayer.cpp
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TypePrinter.h"
+#include "dataset/ConvolutionLayerDataset.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
+
+#include <random>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+const float tolerance_f32 = 1e-03f; /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+const float tolerance_qs8 = 3.0f;   /**< Tolerance value for comparing reference's output against implementation's output for DataType::QS8 */
+
+Tensor compute_convolution_layer(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, DataType dt,
+                                 const PadStrideInfo &conv_info, int fixed_point_position)
+{
+    // Create tensors
+    Tensor src     = create_tensor(input_shape, dt, 1, fixed_point_position);
+    Tensor weights = create_tensor(weights_shape, dt, 1, fixed_point_position);
+    Tensor bias    = create_tensor(bias_shape, dt, 1, fixed_point_position);
+    Tensor dst     = create_tensor(output_shape, dt, 1, fixed_point_position);
+
+    // Create and configure function
+    NEConvolutionLayer conv;
+    conv.configure(&src, &weights, &bias, &dst, conv_info);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    weights.allocator()->allocate();
+    bias.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!weights.info()->is_resizable());
+    BOOST_TEST(!bias.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    if(dt == DataType::F32)
+    {
+        std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+        library->fill(NEAccessor(src), distribution, 0);
+        library->fill(NEAccessor(weights), distribution, 1);
+        library->fill(NEAccessor(bias), distribution, 2);
+    }
+    else
+    {
+        library->fill_tensor_uniform(NEAccessor(src), 0);
+        library->fill_tensor_uniform(NEAccessor(weights), 1);
+        library->fill_tensor_uniform(NEAccessor(bias), 2);
+    }
+
+    // Compute NEConvolutionLayer function
+    conv.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(ConvolutionLayer)
+BOOST_AUTO_TEST_SUITE(GEMM)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration,
+                     AlexNetConvolutionLayerDataset() * boost::unit_test::data::make({ DataType::F32, DataType::QS8 }),
+                     conv_set, dt)
+{
+    // Set fixed point position data type allowed
+    int fixed_point_position = (dt == DataType::F32) ? 0 : 3;
+
+    // Create tensors
+    Tensor src     = create_tensor(conv_set.src_shape, dt, 1, fixed_point_position);
+    Tensor weights = create_tensor(conv_set.weights_shape, dt, 1, fixed_point_position);
+    Tensor bias    = create_tensor(conv_set.bias_shape, dt, 1, fixed_point_position);
+    Tensor dst     = create_tensor(conv_set.dst_shape, dt, 1, fixed_point_position);
+
+    BOOST_TEST(src.info()->is_resizable());
+    BOOST_TEST(weights.info()->is_resizable());
+    BOOST_TEST(bias.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create and configure function
+    NEConvolutionLayer conv;
+    conv.configure(&src, &weights, &bias, &dst, conv_set.info);
+
+    // Validate valid region
+    const ValidRegion src_valid_region     = shape_to_valid_region(conv_set.src_shape);
+    const ValidRegion weights_valid_region = shape_to_valid_region(conv_set.weights_shape);
+    const ValidRegion bias_valid_region    = shape_to_valid_region(conv_set.bias_shape);
+    const ValidRegion dst_valid_region     = shape_to_valid_region(conv_set.dst_shape);
+
+    validate(src.info()->valid_region(), src_valid_region);
+    validate(weights.info()->valid_region(), weights_valid_region);
+    validate(bias.info()->valid_region(), bias_valid_region);
+    validate(dst.info()->valid_region(), dst_valid_region);
+}
+
+BOOST_AUTO_TEST_SUITE(Float)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(SmallConvolutionLayer,
+                     SmallConvolutionLayerDataset() * boost::unit_test::data::make(DataType::F32),
+                     conv_set, dt)
+{
+    // Compute function
+    Tensor dst = compute_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, 0);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, 0);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance_f32);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(LargeConvolutionLayer,
+                     AlexNetConvolutionLayerDataset() * boost::unit_test::data::make(DataType::F32),
+                     conv_set, dt)
+{
+    // Compute function
+    Tensor dst = compute_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, 0);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, 0);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance_f32);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(Quantized)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(SmallConvolutionLayer,
+                     SmallConvolutionLayerDataset() * boost::unit_test::data::make(DataType::QS8) * boost::unit_test::data::xrange(4, 7),
+                     conv_set, dt, fixed_point_position)
+{
+    // Compute function
+    Tensor dst = compute_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance_qs8);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(LargeConvolutionLayer,
+                     AlexNetConvolutionLayerDataset() * boost::unit_test::data::make(DataType::QS8) * boost::unit_test::data::xrange(4, 7),
+                     conv_set, dt, fixed_point_position)
+{
+    // Compute function
+    Tensor dst = compute_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance_qs8);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
\ No newline at end of file
diff --git a/tests/validation/NEON/ConvolutionLayerDirect.cpp b/tests/validation/NEON/ConvolutionLayerDirect.cpp
new file mode 100644
index 0000000000..4e36e331bd
--- /dev/null
+++ b/tests/validation/NEON/ConvolutionLayerDirect.cpp
@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+#include <tuple>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+const float tolerance_fp  = 1e-3f; /**< Tolerance for floating point tests */
+const float tolerance_qs8 = 1;     /**< Tolerance for fixed point tests */
+
+/** Compute NEON direct convolution layer function.
+ *
+ * @param[in] src_shape            Shape of the input tensor.
+ * @param[in] weights_shape        Shape of the weights.
+ * @param[in] bias_shape           Shape of the bias tensor.
+ * @param[in] dst_shape            Shape of the output tensor.
+ * @param[in] dt                   Data type of input, convolution matrix and output tensors.
+ * @param[in] conv_info            Padding and stride information.
+ * @param[in] fixed_point_position (Optional) Number of bits for the fractional part of the fixed point numbers
+ *
+ * @return Computed output tensor.
+*/
+Tensor compute_convolution_layer(const TensorShape &src_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &dst_shape,
+                                 DataType dt, PadStrideInfo conv_info, int fixed_point_position = 0)
+{
+    // Create tensors
+    Tensor src     = create_tensor(src_shape, dt, 1, fixed_point_position);
+    Tensor weights = create_tensor(weights_shape, dt, 1, fixed_point_position);
+    Tensor bias    = create_tensor(bias_shape, dt, 1, fixed_point_position);
+    Tensor dst     = create_tensor(dst_shape, dt, 1, fixed_point_position);
+
+    // Create and configure function
+    NEDirectConvolutionLayer conv_layer;
+    conv_layer.configure(&src, &weights, &bias, &dst, conv_info);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    weights.allocator()->allocate();
+    bias.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!weights.info()->is_resizable());
+    BOOST_TEST(!bias.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    if(dt == DataType::F32)
+    {
+        std::uniform_real_distribution<> distribution(-1.f, 1.f);
+        library->fill(NEAccessor(src), distribution, 0);
+        library->fill(NEAccessor(weights), distribution, 1);
+        library->fill(NEAccessor(bias), distribution, 2);
+    }
+    else
+    {
+        library->fill_tensor_uniform(NEAccessor(src), 0);
+        library->fill_tensor_uniform(NEAccessor(weights), 1);
+        library->fill_tensor_uniform(NEAccessor(bias), 2);
+    }
+
+    // Compute function
+    conv_layer.run();
+
+    return dst;
+}
+
+TensorShape get_output_shape(TensorShape in_shape, TensorShape kernel_shape, const PadStrideInfo &conv_info)
+{
+    TensorShape out_shape(in_shape);
+    const std::pair<unsigned int, unsigned int> scaled_dims = arm_compute::scaled_dimensions(in_shape.x(),
+                                                                                             in_shape.y(),
+                                                                                             kernel_shape.x(),
+                                                                                             conv_info.stride().first, conv_info.stride().second,
+                                                                                             conv_info.pad().first, conv_info.pad().second,
+                                                                                             conv_info.round());
+    out_shape.set(0, scaled_dims.first);
+    out_shape.set(1, scaled_dims.second);
+    out_shape.set(2, kernel_shape[3]);
+    return out_shape;
+}
+
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(ConvolutionLayer)
+BOOST_AUTO_TEST_SUITE(Direct)
+
+BOOST_AUTO_TEST_SUITE(Float)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(W1x1,
+                     DirectConvolutionShapes() * CNNFloatDataTypes() * boost::unit_test::data::xrange(1, 3, 1) * boost::unit_test::data::xrange(1, 3, 1) * boost::unit_test::data::make({ 1, 4, 8, 16 }),
+                     input_shape, dt, sx, sy, num_kernels)
+{
+    const unsigned int  kernel_size = 1;
+    const PadStrideInfo conv_info(sx, sy, 0, 0, DimensionRoundingType::FLOOR);
+    const TensorShape   w_shape(kernel_size, kernel_size, input_shape.z(), static_cast<unsigned int>(num_kernels));
+    const TensorShape   b_shape(static_cast<unsigned int>(num_kernels));
+    const TensorShape   d_shape(get_output_shape(input_shape, w_shape, conv_info));
+
+    Tensor dst = compute_convolution_layer(input_shape, w_shape, b_shape, d_shape, dt, conv_info);
+
+    RawTensor ref = Reference::compute_reference_convolution_layer(input_shape, w_shape, b_shape, d_shape, dt, conv_info, 0);
+
+    // Validate output
+    validate(NEAccessor(dst), ref);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(W3x3, DirectConvolutionShapes() * CNNFloatDataTypes() * boost::unit_test::data::xrange(1, 3, 1) * boost::unit_test::data::xrange(1, 3, 1) * boost::unit_test::data::xrange(0, 2,
+                     1)
+                     * boost::unit_test::data::xrange(0, 2, 1) * boost::unit_test::data::make({ 1, 4, 8, 16 }),
+                     input_shape, dt, sx, sy, px, py, num_kernels)
+{
+    const unsigned int  kernel_size = 3;
+    const PadStrideInfo conv_info(sx, sy, px, py, DimensionRoundingType::FLOOR);
+    const TensorShape   w_shape(kernel_size, kernel_size, input_shape.z(), static_cast<unsigned int>(num_kernels));
+    const TensorShape   b_shape(static_cast<unsigned int>(num_kernels));
+    const TensorShape   d_shape(get_output_shape(input_shape, w_shape, conv_info));
+
+    Tensor dst = compute_convolution_layer(input_shape, w_shape, b_shape, d_shape, dt, conv_info);
+
+    RawTensor ref = Reference::compute_reference_convolution_layer(input_shape, w_shape, b_shape, d_shape, dt, conv_info, 0);
+
+    // Validate output
+    validate(NEAccessor(dst), ref, tolerance_fp);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(Quantized)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(W1x1,
+                     DirectConvolutionShapes() * boost::unit_test::data::xrange(1, 3, 1) * boost::unit_test::data::xrange(1, 3, 1) * boost::unit_test::data::make({ 1, 4, 8, 16 }) * boost::unit_test::data::make({ 4, 5 }),
+                     input_shape, sx, sy, num_kernels, fixed_point_position)
+{
+    const unsigned int  kernel_size = 1;
+    const PadStrideInfo conv_info(sx, sy, 0, 0, DimensionRoundingType::FLOOR);
+    const TensorShape   w_shape(kernel_size, kernel_size, input_shape.z(), static_cast<unsigned int>(num_kernels));
+    const TensorShape   b_shape(static_cast<unsigned int>(num_kernels));
+    const TensorShape   d_shape(get_output_shape(input_shape, w_shape, conv_info));
+
+    Tensor dst = compute_convolution_layer(input_shape, w_shape, b_shape, d_shape, DataType::QS8, conv_info, fixed_point_position);
+
+    RawTensor ref = Reference::compute_reference_convolution_layer(input_shape, w_shape, b_shape, d_shape, DataType::QS8, conv_info, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(W3x3, DirectConvolutionShapes() * boost::unit_test::data::xrange(1, 3, 1) * boost::unit_test::data::xrange(1, 3, 1) * boost::unit_test::data::xrange(0, 2, 1)
+                     * boost::unit_test::data::xrange(0, 2, 1) * boost::unit_test::data::make({ 1, 4, 8, 16 }) * boost::unit_test::data::make({ 4, 5 }),
+                     input_shape, sx, sy, px, py, num_kernels, fixed_point_position)
+{
+    const unsigned int  kernel_size = 3;
+    const PadStrideInfo conv_info(sx, sy, px, py, DimensionRoundingType::FLOOR);
+    const TensorShape   w_shape(kernel_size, kernel_size, input_shape.z(), static_cast<unsigned int>(num_kernels));
+    const TensorShape   b_shape(static_cast<unsigned int>(num_kernels));
+    const TensorShape   d_shape(get_output_shape(input_shape, w_shape, conv_info));
+
+    Tensor dst = compute_convolution_layer(input_shape, w_shape, b_shape, d_shape, DataType::QS8, conv_info, fixed_point_position);
+
+    RawTensor ref = Reference::compute_reference_convolution_layer(input_shape, w_shape, b_shape, d_shape, DataType::QS8, conv_info, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref, tolerance_qs8);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
\ No newline at end of file
diff --git a/tests/validation/NEON/DepthConvert.cpp b/tests/validation/NEON/DepthConvert.cpp
new file mode 100644
index 0000000000..ec0bb7ccc5
--- /dev/null
+++ b/tests/validation/NEON/DepthConvert.cpp
@@ -0,0 +1,500 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEDepthConvert.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute Neon depth convert function.
+ *
+ * @param[in] shape                Shape of the input and output tensors.
+ * @param[in] dt_in                Data type of input tensor.
+ * @param[in] dt_out               Data type of the output tensor.
+ * @param[in] policy               Conversion policy.
+ * @param[in] shift                Value for down/up conversions. Must be 0 <= shift < 8.
+ * @param[in] fixed_point_position Fixed point position.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_depth_convert(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, uint32_t fixed_point_position)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, dt_in, 1, fixed_point_position);
+    Tensor dst = create_tensor(shape, dt_out, 1, fixed_point_position);
+
+    // Create and configure function
+    NEDepthConvert depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    library->fill_tensor_uniform(NEAccessor(src), 0);
+
+    // Compute function
+    depth_convert.run();
+
+    return dst;
+}
+/** Configure and validate region/padding function.
+ *
+ * @param[in] shape                Shape of the input and output tensors.
+ * @param[in] dt_in                Data type of input tensor.
+ * @param[in] dt_out               Data type of the output tensor.
+ * @param[in] policy               Conversion policy.
+ * @param[in] shift                Value for down/up conversions. Must be 0 <= shift < 8.
+ * @param[in] fixed_point_position Fixed point position.
+ *
+ */
+
+void compute_configure_validate(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, uint32_t fixed_point_position)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, dt_in, 1, fixed_point_position);
+    Tensor dst = create_tensor(shape, dt_out, 1, fixed_point_position);
+
+    BOOST_TEST(src.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create and configure function
+    NEDepthConvert depth_convert;
+    depth_convert.configure(&src, &dst, policy, shift);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding(0, required_padding(shape.x(), 16), 0, 0);
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(DepthConvert)
+
+BOOST_AUTO_TEST_SUITE(QS8_to_F32)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE })
+                     * boost::unit_test::data::xrange(1, 7, 1),
+                     shape, policy, fixed_point_position)
+{
+    // Compute configure and validate region/padding
+    compute_configure_validate(shape, DataType::QS8, DataType::F32, policy, 0, fixed_point_position);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE })
+                     * boost::unit_test::data::xrange(1, 7, 1),
+                     shape, policy, fixed_point_position)
+{
+    // Compute function
+    Tensor dst = compute_depth_convert(shape, DataType::QS8, DataType::F32, policy, 0, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::QS8, DataType::F32, policy, 0, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE })
+                     * boost::unit_test::data::xrange(1, 7, 1),
+                     shape, policy, fixed_point_position)
+{
+    // Compute function
+    Tensor dst = compute_depth_convert(shape, DataType::QS8, DataType::F32, policy, 0, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::QS8, DataType::F32, policy, 0, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(F32_to_QS8)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE })
+                     * boost::unit_test::data::xrange(1, 7, 1),
+                     shape, policy, fixed_point_position)
+{
+    // Compute configure and validate region/padding
+    compute_configure_validate(shape, DataType::F32, DataType::QS8, policy, 0, fixed_point_position);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE })
+                     * boost::unit_test::data::xrange(1, 7, 1),
+                     shape, policy, fixed_point_position)
+{
+    // Compute function
+    Tensor dst = compute_depth_convert(shape, DataType::F32, DataType::QS8, policy, 0, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::F32, DataType::QS8, policy, 0, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE })
+                     * boost::unit_test::data::xrange(1, 7, 1),
+                     shape, policy, fixed_point_position)
+{
+    // Compute function
+    Tensor dst = compute_depth_convert(shape, DataType::F32, DataType::QS8, policy, 0, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::F32, DataType::QS8, policy, 0, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(U8_to_U16)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute configure and validate region/padding
+    compute_configure_validate(shape, DataType::U8, DataType::U16, policy, shift, 0);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    Tensor dst = compute_depth_convert(shape, DataType::U8, DataType::U16, policy, shift, 0);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::U8, DataType::U16, policy, shift, 0);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    Tensor dst = compute_depth_convert(shape, DataType::U8, DataType::U16, policy, shift, 0);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::U8, DataType::U16, policy, shift, 0);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(U8_to_S16)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute configure and validate region/padding
+    compute_configure_validate(shape, DataType::U8, DataType::S16, policy, shift, 0);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    Tensor dst = compute_depth_convert(shape, DataType::U8, DataType::S16, policy, shift, 0);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::U8, DataType::S16, policy, shift, 0);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    Tensor dst = compute_depth_convert(shape, DataType::U8, DataType::S16, policy, shift, 0);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::U8, DataType::S16, policy, shift, 0);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(U8_to_S32)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute configure and validate region/padding
+    compute_configure_validate(shape, DataType::U8, DataType::S32, policy, shift, 0);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    Tensor dst = compute_depth_convert(shape, DataType::U8, DataType::S32, policy, shift, 0);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::U8, DataType::S32, policy, shift, 0);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    Tensor dst = compute_depth_convert(shape, DataType::U8, DataType::S32, policy, shift, 0);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::U8, DataType::S32, policy, shift, 0);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(U16_to_U8)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute configure and validate region/padding
+    compute_configure_validate(shape, DataType::U16, DataType::U8, policy, shift, 0);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    Tensor dst = compute_depth_convert(shape, DataType::U16, DataType::U8, policy, shift, 0);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::U16, DataType::U8, policy, shift, 0);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    Tensor dst = compute_depth_convert(shape, DataType::U16, DataType::U8, policy, shift, 0);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::U16, DataType::U8, policy, shift, 0);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(U16_to_U32)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute configure and validate region/padding
+    compute_configure_validate(shape, DataType::U16, DataType::U32, policy, shift, 0);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    Tensor dst = compute_depth_convert(shape, DataType::U16, DataType::U32, policy, shift, 0);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::U16, DataType::U32, policy, shift, 0);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    Tensor dst = compute_depth_convert(shape, DataType::U16, DataType::U32, policy, shift, 0);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::U16, DataType::U32, policy, shift, 0);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(S16_to_U8)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute configure and validate region/padding
+    compute_configure_validate(shape, DataType::S16, DataType::U8, policy, shift, 0);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    Tensor dst = compute_depth_convert(shape, DataType::S16, DataType::U8, policy, shift, 0);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::S16, DataType::U8, policy, shift, 0);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    Tensor dst = compute_depth_convert(shape, DataType::S16, DataType::U8, policy, shift, 0);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::S16, DataType::U8, policy, shift, 0);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(S16_to_S32)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute configure and validate region/padding
+    compute_configure_validate(shape, DataType::S16, DataType::S32, policy, shift, 0);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    Tensor dst = compute_depth_convert(shape, DataType::S16, DataType::S32, policy, shift, 0);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::S16, DataType::S32, policy, shift, 0);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ ConvertPolicy::SATURATE, ConvertPolicy::WRAP })
+                     * boost::unit_test::data::xrange(0, 7, 1),
+                     shape, policy, shift)
+{
+    // Compute function
+    Tensor dst = compute_depth_convert(shape, DataType::S16, DataType::S32, policy, shift, 0);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_depth_convert(shape, DataType::S16, DataType::S32, policy, shift, 0);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/FillBorder.cpp b/tests/validation/NEON/FillBorder.cpp
new file mode 100644
index 0000000000..9fbeb998f5
--- /dev/null
+++ b/tests/validation/NEON/FillBorder.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(FillBorder, BorderModes() * boost::unit_test::data::make({ PaddingSize{ 0 }, PaddingSize{ 1, 0, 1, 2 }, PaddingSize{ 10 } }), border_mode, padding)
+{
+    constexpr uint8_t border_value = 42U;
+    constexpr uint8_t tensor_value = 89U;
+    BorderSize        border_size{ 5 };
+
+    // Create tensors
+    Tensor src = create_tensor(TensorShape{ 10U, 10U, 2U }, DataType::U8);
+
+    src.info()->extend_padding(padding);
+
+    // Allocate tensor
+    src.allocator()->allocate();
+
+    // Check padding is as required
+    validate(src.info()->padding(), padding);
+
+    // Fill tensor with constant value
+    std::uniform_int_distribution<uint8_t> distribution{ tensor_value, tensor_value };
+    library->fill(NEAccessor(src), distribution, 0);
+
+    // Create and configure kernel
+    NEFillBorderKernel fill_border;
+    fill_border.configure(&src, border_size, border_mode, border_value);
+
+    // Run kernel
+    fill_border.run(fill_border.window());
+
+    // Validate border
+    border_size.limit(padding);
+    validate(NEAccessor(src), border_size, border_mode, &border_value);
+
+    // Validate tensor
+    validate(NEAccessor(src), &tensor_value);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/Fixedpoint/Exp_QS8.cpp b/tests/validation/NEON/Fixedpoint/Exp_QS8.cpp
new file mode 100644
index 0000000000..086314fdd3
--- /dev/null
+++ b/tests/validation/NEON/Fixedpoint/Exp_QS8.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/ReferenceCPP.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/NEFixedPoint.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+const float tolerance = 0.0f; /**< Tolerance value for comparing reference's output against implementation's output */
+
+/** Compute Neon exponential function for signed 8bit fixed point.
+ *
+ * @param[in] shape Shape of the input and output tensors.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_exp_qs8(const TensorShape &shape, int fixed_point_position)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, DataType::QS8, 1, fixed_point_position);
+    Tensor dst = create_tensor(shape, DataType::QS8, 1, fixed_point_position);
+
+    constexpr unsigned int num_elems_processed_per_iteration = 16;
+    Window                 window                            = calculate_max_window(*src.info(), Steps(num_elems_processed_per_iteration));
+    AccessWindowHorizontal input_access(src.info(), 0, num_elems_processed_per_iteration);
+    AccessWindowHorizontal output_access(dst.info(), 0, num_elems_processed_per_iteration);
+
+    update_window_and_padding(window, input_access, output_access);
+    output_access.set_valid_region(window, src.info()->valid_region());
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors. Keep the range between (1, (1 << (fixed_point_position - 1))) so the result won't
+    // overflow. E.g. e^7 = 1096, which cannot be represented in QS8
+    std::uniform_int_distribution<> distribution(1, (1 << (fixed_point_position - 1)));
+    library->fill(NEAccessor(src), distribution, 0);
+
+    Iterator input(&src, window);
+    Iterator output(&dst, window);
+
+    execute_window_loop(window, [&](const Coordinates & id)
+    {
+        qint8x16_t in = vld1q_s8(reinterpret_cast<const qint8_t *>(input.ptr()));
+        // Use saturated exp
+        vst1q_s8(reinterpret_cast<qint8_t *>(output.ptr()), vqexpq_qs8(in, fixed_point_position));
+    },
+    input, output);
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(FixedPoint)
+BOOST_AUTO_TEST_SUITE(QS8)
+BOOST_AUTO_TEST_SUITE(Exp)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunSmall, Small1DShape() * boost::unit_test::data::xrange(1, 7), shape, fixed_point_position)
+{
+    // Compute function
+    Tensor dst = compute_exp_qs8(shape, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_fixed_point_operation(shape, DataType::QS8, DataType::QS8, FixedPointOp::EXP, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance, 0);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/Fixedpoint/Invsqrt_QS8.cpp b/tests/validation/NEON/Fixedpoint/Invsqrt_QS8.cpp
new file mode 100644
index 0000000000..3308f7d855
--- /dev/null
+++ b/tests/validation/NEON/Fixedpoint/Invsqrt_QS8.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/ReferenceCPP.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/NEFixedPoint.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+const float tolerance = 3; /**< Tolerance value for comparing reference's output against implementation's output */
+
+/** Compute Neon inverse square root function for signed 8bit fixed point.
+ *
+ * @param[in] shape Shape of the input and output tensors.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_invsqrt_qs8(const TensorShape &shape, int fixed_point_position)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, DataType::QS8, 1, fixed_point_position);
+    Tensor dst = create_tensor(shape, DataType::QS8, 1, fixed_point_position);
+
+    constexpr unsigned int num_elems_processed_per_iteration = 16;
+    Window                 window                            = calculate_max_window(*src.info(), Steps(num_elems_processed_per_iteration));
+    AccessWindowHorizontal input_access(src.info(), 0, num_elems_processed_per_iteration);
+    AccessWindowHorizontal output_access(dst.info(), 0, num_elems_processed_per_iteration);
+
+    update_window_and_padding(window, input_access, output_access);
+    output_access.set_valid_region(window, src.info()->valid_region());
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors. Keep the range between (32, 127) so the result won't
+    // overflow. E.g. for Q2.5 invsqrt(0.001) = 31.6, which cannot be represented.
+    std::uniform_int_distribution<> distribution(32, 127);
+    library->fill(NEAccessor(src), distribution, 0);
+
+    Iterator input(&src, window);
+    Iterator output(&dst, window);
+
+    execute_window_loop(window, [&](const Coordinates & id)
+    {
+        qint8x16_t in = vld1q_s8(reinterpret_cast<const qint8_t *>(input.ptr()));
+        vst1q_s8(reinterpret_cast<qint8_t *>(output.ptr()), vinvsqrtq_qs8(in, fixed_point_position));
+    },
+    input, output);
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(FixedPoint)
+BOOST_AUTO_TEST_SUITE(QS8)
+BOOST_AUTO_TEST_SUITE(Invsqrt)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Small1DShape, SmallShapes() * boost::unit_test::data::xrange(1, 6), shape, fixed_point_position)
+{
+    // Compute function
+    Tensor dst = compute_invsqrt_qs8(shape, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_fixed_point_operation(shape, DataType::QS8, DataType::QS8, FixedPointOp::INV_SQRT, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance, 0);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/Fixedpoint/Log_QS8.cpp b/tests/validation/NEON/Fixedpoint/Log_QS8.cpp
new file mode 100644
index 0000000000..7b734c12b1
--- /dev/null
+++ b/tests/validation/NEON/Fixedpoint/Log_QS8.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/ReferenceCPP.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/NEFixedPoint.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+const float tolerance = 5; /**< Tolerance value for comparing reference's output against implementation's output */
+
+/** Compute Neon logarithm function for signed 8bit fixed point.
+ *
+ * @param[in] shape Shape of the input and output tensors.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_log_qs8(const TensorShape &shape, int fixed_point_position)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, DataType::QS8, 1, fixed_point_position);
+    Tensor dst = create_tensor(shape, DataType::QS8, 1, fixed_point_position);
+
+    constexpr unsigned int num_elems_processed_per_iteration = 16;
+    Window                 window                            = calculate_max_window(*src.info(), Steps(num_elems_processed_per_iteration));
+    AccessWindowHorizontal input_access(src.info(), 0, num_elems_processed_per_iteration);
+    AccessWindowHorizontal output_access(dst.info(), 0, num_elems_processed_per_iteration);
+
+    update_window_and_padding(window, input_access, output_access);
+    output_access.set_valid_region(window, src.info()->valid_region());
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors. Keep the range between ((1 << (fixed_point_position - 1), 63) so the result won't
+    // overflow. E.g. for Q2.5 ln(0.001) = -6.9, which cannot be represented.
+    std::uniform_int_distribution<> distribution((1 << (fixed_point_position - 1)), 63);
+    library->fill(NEAccessor(src), distribution, 0);
+
+    Iterator input(&src, window);
+    Iterator output(&dst, window);
+
+    execute_window_loop(window, [&](const Coordinates & id)
+    {
+        qint8x16_t in = vld1q_s8(reinterpret_cast<const qint8_t *>(input.ptr()));
+        vst1q_s8(reinterpret_cast<qint8_t *>(output.ptr()), vlogq_qs8(in, fixed_point_position));
+    },
+    input, output);
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(FixedPoint)
+BOOST_AUTO_TEST_SUITE(QS8)
+BOOST_AUTO_TEST_SUITE(Log)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunSmall, Small1DShape() * boost::unit_test::data::xrange(3, 6), shape, fixed_point_position)
+{
+    // Compute function
+    Tensor dst = compute_log_qs8(shape, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_fixed_point_operation(shape, DataType::QS8, DataType::QS8, FixedPointOp::LOG, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance, 0);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/Fixedpoint/Reciprocal_QS8.cpp b/tests/validation/NEON/Fixedpoint/Reciprocal_QS8.cpp
new file mode 100644
index 0000000000..4c1c782a18
--- /dev/null
+++ b/tests/validation/NEON/Fixedpoint/Reciprocal_QS8.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/ReferenceCPP.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/NEON/NEFixedPoint.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+const float tolerance = 3; /**< Tolerance value for comparing reference's output against implementation's output */
+
+/** Compute Neon reciprocal function for signed 8bit fixed point.
+ *
+ * @param[in] shape Shape of the input and output tensors.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_reciprocal_qs8(const TensorShape &shape, int fixed_point_position)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, DataType::QS8, 1, fixed_point_position);
+    Tensor dst = create_tensor(shape, DataType::QS8, 1, fixed_point_position);
+
+    constexpr unsigned int num_elems_processed_per_iteration = 16;
+    Window                 window                            = calculate_max_window(*src.info(), Steps(num_elems_processed_per_iteration));
+    AccessWindowHorizontal input_access(src.info(), 0, num_elems_processed_per_iteration);
+    AccessWindowHorizontal output_access(dst.info(), 0, num_elems_processed_per_iteration);
+
+    update_window_and_padding(window, input_access, output_access);
+    output_access.set_valid_region(window, src.info()->valid_region());
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors. Keep the range between (15, 100) so the result won't
+    // overflow. E.g. for Q2.5 reciprocal(0.001) = 1000, which cannot be represented.
+    std::uniform_int_distribution<> distribution(15, 100);
+    library->fill(NEAccessor(src), distribution, 0);
+
+    Iterator input(&src, window);
+    Iterator output(&dst, window);
+
+    execute_window_loop(window, [&](const Coordinates & id)
+    {
+        qint8x16_t in = vld1q_s8(reinterpret_cast<const qint8_t *>(input.ptr()));
+        vst1q_s8(reinterpret_cast<qint8_t *>(output.ptr()), vrecipq_qs8(in, fixed_point_position));
+    },
+    input, output);
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(FixedPoint)
+BOOST_AUTO_TEST_SUITE(QS8)
+BOOST_AUTO_TEST_SUITE(Reciprocal)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunSmall, Small1DShape() * boost::unit_test::data::xrange(1, 6), shape, fixed_point_position)
+{
+    // Compute function
+    Tensor dst = compute_reciprocal_qs8(shape, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_fixed_point_operation(shape, DataType::QS8, DataType::QS8, FixedPointOp::RECIPROCAL, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance, 0);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/FullyConnectedLayer.cpp b/tests/validation/NEON/FullyConnectedLayer.cpp
new file mode 100644
index 0000000000..bda235bd55
--- /dev/null
+++ b/tests/validation/NEON/FullyConnectedLayer.cpp
@@ -0,0 +1,221 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TypePrinter.h"
+#include "dataset/FullyConnectedLayerDataset.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
+
+#include <random>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+const float tolerance_f32 = 1e-03f; /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+const float tolerance_qs8 = 1.0f;   /**< Tolerance value for comparing reference's output against implementation's output for DataType::QS8 */
+
+Tensor compute_fully_connected_layer(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, DataType dt,
+                                     bool transpose_weights, int fixed_point_position)
+{
+    // Create tensors
+    Tensor src  = create_tensor(input_shape, dt, 1, fixed_point_position);
+    Tensor bias = create_tensor(bias_shape, dt, 1, fixed_point_position);
+    Tensor dst  = create_tensor(output_shape, dt, 1, fixed_point_position);
+
+    // Swap the first and second dimension of weights' shape if transpose_weights is true
+    TensorShape ws = weights_shape;
+    if(transpose_weights)
+    {
+        const size_t dimx = ws.x();
+        ws.set(0, ws.y());
+        ws.set(1, dimx);
+    }
+
+    Tensor weights = create_tensor(ws, dt, 1, fixed_point_position);
+
+    // Create and configure function.
+    // Note: We pass the weights already transposed
+    NEFullyConnectedLayer fc;
+    fc.configure(&src, &weights, &bias, &dst, false);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    weights.allocator()->allocate();
+    bias.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!weights.info()->is_resizable());
+    BOOST_TEST(!bias.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    if(dt == DataType::F32)
+    {
+        std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+        library->fill(NEAccessor(src), distribution, 0);
+        library->fill(NEAccessor(weights), distribution, 1);
+        library->fill(NEAccessor(bias), distribution, 2);
+    }
+    else
+    {
+        library->fill_tensor_uniform(NEAccessor(src), 0);
+        library->fill_tensor_uniform(NEAccessor(weights), 1);
+        library->fill_tensor_uniform(NEAccessor(bias), 2);
+    }
+
+    // Compute NEFullyConnectedLayer function
+    fc.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(FullyConnectedLayer)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration,
+                     SmallFullyConnectedLayerDataset() * boost::unit_test::data::make({ DataType::F32, DataType::QS8 }),
+                     fc_set, dt)
+{
+    // Set fixed point position data type allowed
+    int fixed_point_position = (dt == DataType::F32) ? 0 : 3;
+
+    // Create tensors
+    Tensor src  = create_tensor(fc_set.src_shape, dt, 1, fixed_point_position);
+    Tensor bias = create_tensor(fc_set.bias_shape, dt, 1, fixed_point_position);
+    Tensor dst  = create_tensor(fc_set.dst_shape, dt, 1, fixed_point_position);
+
+    // Swap the first and second dimension of weights' shape if transpose_weights is true
+    TensorShape ws = fc_set.weights_shape;
+    if(fc_set.transpose_weights)
+    {
+        const size_t dimx = ws.x();
+        ws.set(0, ws.y());
+        ws.set(1, dimx);
+    }
+
+    Tensor weights = create_tensor(ws, dt, 1, fixed_point_position);
+
+    BOOST_TEST(src.info()->is_resizable());
+    BOOST_TEST(weights.info()->is_resizable());
+    BOOST_TEST(bias.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create and configure function.
+    // Note: We pass the weights already transposed
+    NEFullyConnectedLayer fc;
+    fc.configure(&src, &weights, &bias, &dst, false);
+
+    // Validate valid region
+    const ValidRegion src_valid_region     = shape_to_valid_region(fc_set.src_shape);
+    const ValidRegion weights_valid_region = shape_to_valid_region(ws);
+    const ValidRegion bias_valid_region    = shape_to_valid_region(fc_set.bias_shape);
+    const ValidRegion dst_valid_region     = shape_to_valid_region(fc_set.dst_shape);
+
+    validate(src.info()->valid_region(), src_valid_region);
+    validate(weights.info()->valid_region(), weights_valid_region);
+    validate(bias.info()->valid_region(), bias_valid_region);
+    validate(dst.info()->valid_region(), dst_valid_region);
+}
+
+BOOST_AUTO_TEST_SUITE(Float)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall,
+                     SmallFullyConnectedLayerDataset() * boost::unit_test::data::make({ DataType::F32 }),
+                     fc_set, dt)
+{
+    // Compute function
+    Tensor dst = compute_fully_connected_layer(fc_set.src_shape, fc_set.weights_shape, fc_set.bias_shape, fc_set.dst_shape, dt, fc_set.transpose_weights, 0);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_fully_connected_layer(fc_set.src_shape, fc_set.weights_shape, fc_set.bias_shape, fc_set.dst_shape, dt, fc_set.transpose_weights, 0);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance_f32);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge,
+                     LargeFullyConnectedLayerDataset() * boost::unit_test::data::make({ DataType::F32 }),
+                     fc_set, dt)
+{
+    // Compute function
+    Tensor dst = compute_fully_connected_layer(fc_set.src_shape, fc_set.weights_shape, fc_set.bias_shape, fc_set.dst_shape, dt, fc_set.transpose_weights, 0);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_fully_connected_layer(fc_set.src_shape, fc_set.weights_shape, fc_set.bias_shape, fc_set.dst_shape, dt, fc_set.transpose_weights, 0);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance_f32);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(Quantized)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall,
+                     SmallFullyConnectedLayerDataset() * boost::unit_test::data::make({ DataType::QS8 }) * boost::unit_test::data::xrange(4, 7),
+                     fc_set, dt, fixed_point_position)
+{
+    // Compute function
+    Tensor dst = compute_fully_connected_layer(fc_set.src_shape, fc_set.weights_shape, fc_set.bias_shape, fc_set.dst_shape, dt, fc_set.transpose_weights, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_fully_connected_layer(fc_set.src_shape, fc_set.weights_shape, fc_set.bias_shape, fc_set.dst_shape, dt, fc_set.transpose_weights, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance_qs8);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge,
+                     LargeFullyConnectedLayerDataset() * boost::unit_test::data::make({ DataType::QS8 }) * boost::unit_test::data::xrange(4, 7),
+                     fc_set, dt, fixed_point_position)
+{
+    // Compute function
+    Tensor dst = compute_fully_connected_layer(fc_set.src_shape, fc_set.weights_shape, fc_set.bias_shape, fc_set.dst_shape, dt, fc_set.transpose_weights, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_fully_connected_layer(fc_set.src_shape, fc_set.weights_shape, fc_set.bias_shape, fc_set.dst_shape, dt, fc_set.transpose_weights, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance_qs8);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/GEMM.cpp b/tests/validation/NEON/GEMM.cpp
new file mode 100644
index 0000000000..0172ddeb76
--- /dev/null
+++ b/tests/validation/NEON/GEMM.cpp
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "dataset/GEMMDataset.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+const float tolerance_f32 = 1e-03f; /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+const float tolerance_qs8 = 1.0f;   /**< Tolerance value for comparing reference's output against implementation's output for DataType::QS8 */
+
+Tensor compute_gemm(const TensorShape &src_shape1, const TensorShape &src_shape2, const TensorShape &src_shape3,
+                    const TensorShape &out_shape, float alpha, float beta, DataType dt, int fixed_point_position = 0)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(src_shape1, dt, 1, fixed_point_position);
+    Tensor src2 = create_tensor(src_shape2, dt, 1, fixed_point_position);
+    Tensor src3 = create_tensor(src_shape3, dt, 1, fixed_point_position);
+    Tensor dst  = create_tensor(out_shape, dt, 1, fixed_point_position);
+
+    // Create and configure function
+    NEGEMM gemm;
+    gemm.configure(&src1, &src2, &src3, &dst, alpha, beta);
+
+    // Allocate tensors
+    src1.allocator()->allocate();
+    src2.allocator()->allocate();
+    src3.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src1.info()->is_resizable());
+    BOOST_TEST(!src2.info()->is_resizable());
+    BOOST_TEST(!src3.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    if(dt == DataType::F32)
+    {
+        std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+        library->fill(NEAccessor(src1), distribution, 0);
+        library->fill(NEAccessor(src2), distribution, 1);
+        library->fill(NEAccessor(src3), distribution, 2);
+    }
+    else
+    {
+        library->fill_tensor_uniform(NEAccessor(src1), 0);
+        library->fill_tensor_uniform(NEAccessor(src2), 1);
+        library->fill_tensor_uniform(NEAccessor(src3), 2);
+    }
+
+    // Compute function
+    gemm.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(GEMM)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration,
+                     SmallGEMMDataset() * boost::unit_test::data::make({ DataType::F32, DataType::QS8 }),
+                     gemm_set, dt)
+{
+    // Set fixed point position data type allowed
+    int fixed_point_position = (dt == DataType::F32) ? 0 : 3;
+
+    // Create tensors
+    Tensor src1 = create_tensor(gemm_set.shape_a, dt, 1, fixed_point_position);
+    Tensor src2 = create_tensor(gemm_set.shape_b, dt, 1, fixed_point_position);
+    Tensor src3 = create_tensor(gemm_set.shape_c, dt, 1, fixed_point_position);
+    Tensor dst  = create_tensor(gemm_set.shape_d, dt, 1, fixed_point_position);
+
+    BOOST_TEST(src1.info()->is_resizable());
+    BOOST_TEST(src2.info()->is_resizable());
+    BOOST_TEST(src3.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create and configure function
+    NEGEMM gemm;
+    gemm.configure(&src1, &src2, &src3, &dst, gemm_set.alpha, gemm_set.beta);
+
+    // Validate valid region
+    const ValidRegion src1_valid_region = shape_to_valid_region(gemm_set.shape_a);
+    const ValidRegion src2_valid_region = shape_to_valid_region(gemm_set.shape_b);
+    const ValidRegion src3_valid_region = shape_to_valid_region(gemm_set.shape_c);
+    const ValidRegion dst_valid_region  = shape_to_valid_region(gemm_set.shape_d);
+
+    validate(src1.info()->valid_region(), src1_valid_region);
+    validate(src2.info()->valid_region(), src2_valid_region);
+    validate(src3.info()->valid_region(), src3_valid_region);
+    validate(dst.info()->valid_region(), dst_valid_region);
+}
+
+BOOST_AUTO_TEST_SUITE(Float)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(SmallGEMM, SmallGEMMDataset() * boost::unit_test::data::make(DataType::F32),
+                     gemm_set, dt)
+{
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_gemm(gemm_set.shape_a, gemm_set.shape_b, gemm_set.shape_c, gemm_set.shape_d, gemm_set.alpha, gemm_set.beta, dt);
+
+    // Compute function
+    Tensor dst = compute_gemm(gemm_set.shape_a, gemm_set.shape_b, gemm_set.shape_c, gemm_set.shape_d, gemm_set.alpha, gemm_set.beta, dt);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance_f32);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(LargeGEMM, LargeGEMMDataset() * boost::unit_test::data::make(DataType::F32),
+                     gemm_set, dt)
+{
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_gemm(gemm_set.shape_a, gemm_set.shape_b, gemm_set.shape_c, gemm_set.shape_d, gemm_set.alpha, gemm_set.beta, dt);
+
+    // Compute function
+    Tensor dst = compute_gemm(gemm_set.shape_a, gemm_set.shape_b, gemm_set.shape_c, gemm_set.shape_d, gemm_set.alpha, gemm_set.beta, dt);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance_f32);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(Quantized)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(SmallGEMM, SmallGEMMDataset() * boost::unit_test::data::make(DataType::QS8) * boost::unit_test::data::xrange(1, 7),
+                     gemm_set, dt, fixed_point_position)
+{
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_gemm(gemm_set.shape_a, gemm_set.shape_b, gemm_set.shape_c, gemm_set.shape_d, gemm_set.alpha, gemm_set.beta, dt, fixed_point_position);
+
+    // Compute function
+    Tensor dst = compute_gemm(gemm_set.shape_a, gemm_set.shape_b, gemm_set.shape_c, gemm_set.shape_d, gemm_set.alpha, gemm_set.beta, dt, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance_qs8);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(LargeGEMM, LargeGEMMDataset() * boost::unit_test::data::make(DataType::QS8) * boost::unit_test::data::xrange(1, 7),
+                     gemm_set, dt, fixed_point_position)
+{
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_gemm(gemm_set.shape_a, gemm_set.shape_b, gemm_set.shape_c, gemm_set.shape_d, gemm_set.alpha, gemm_set.beta, dt, fixed_point_position);
+
+    // Compute function
+    Tensor dst = compute_gemm(gemm_set.shape_a, gemm_set.shape_b, gemm_set.shape_c, gemm_set.shape_d, gemm_set.alpha, gemm_set.beta, dt, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance_qs8);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/IntegralImage.cpp b/tests/validation/NEON/IntegralImage.cpp
new file mode 100644
index 0000000000..f94af430d1
--- /dev/null
+++ b/tests/validation/NEON/IntegralImage.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEIntegralImage.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute Neon integral image function.
+ *
+ * @param[in] shape Shape of the input and output tensors.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_integral_image(const TensorShape &shape)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, DataType::U8);
+    Tensor dst = create_tensor(shape, DataType::U32);
+
+    // Create integral image configure function
+    NEIntegralImage integral_image;
+    integral_image.configure(&src, &dst);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    library->fill_tensor_uniform(NEAccessor(src), 0);
+
+    // Compute function
+    integral_image.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(IntegralImage)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, SmallShapes() + LargeShapes(), shape)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, DataType::U8);
+    Tensor dst = create_tensor(shape, DataType::U32);
+
+    BOOST_TEST(src.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create integral image configure function
+    NEIntegralImage integral_image;
+    integral_image.configure(&src, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize src_padding(0, required_padding(shape.x(), 16), 0, 0);
+    const PaddingSize dst_padding(1, required_padding(shape.x(), 16), 0, 1);
+
+    validate(src.info()->padding(), src_padding);
+    validate(dst.info()->padding(), dst_padding);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes(), shape)
+{
+    // Compute function
+    Tensor dst = compute_integral_image(shape);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_integral_image(shape);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes(), shape)
+{
+    // Compute function
+    Tensor dst = compute_integral_image(shape);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_integral_image(shape);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/NormalizationLayer.cpp b/tests/validation/NEON/NormalizationLayer.cpp
new file mode 100644
index 0000000000..ff791effa0
--- /dev/null
+++ b/tests/validation/NEON/NormalizationLayer.cpp
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TypePrinter.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/runtime/NEON/functions/NENormalizationLayer.h"
+
+#include <random>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Define tolerance of the normalization layer depending on values data type.
+ *
+ * @param[in] dt Data type of the tensors' values.
+ *
+ * @return Tolerance depending on the data type.
+ */
+float normalization_layer_tolerance(DataType dt)
+{
+    switch(dt)
+    {
+        case DataType::QS8:
+            return 2.0f;
+        case DataType::F32:
+            return 1e-05;
+        default:
+            return 0.f;
+    }
+}
+
+/** Compute Neon normalization layer function.
+ *
+ * @param[in] shape                Shape of the input and output tensors.
+ * @param[in] dt                   Data type of input and output tensors.
+ * @param[in] norm_info            Normalization Layer information.
+ * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16 (default = 0).
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_normalization_layer(const TensorShape &shape, DataType dt, NormalizationLayerInfo norm_info, int fixed_point_position = 0)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, dt, 1, fixed_point_position);
+    Tensor dst = create_tensor(shape, dt, 1, fixed_point_position);
+
+    // Create and configure function
+    NENormalizationLayer norm;
+    norm.configure(&src, &dst, norm_info);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    if(dt == DataType::QS8)
+    {
+        const int8_t one_fixed_point       = 1 << fixed_point_position;
+        const int8_t minus_one_fixed_point = -one_fixed_point;
+        library->fill_tensor_uniform(NEAccessor(src), 0, minus_one_fixed_point, one_fixed_point);
+    }
+    else
+    {
+        library->fill_tensor_uniform(NEAccessor(src), 0);
+    }
+
+    // Compute function
+    norm.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(NormalizationLayer)
+
+BOOST_AUTO_TEST_SUITE(Float)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall,
+                     SmallShapes() * DataType::F32 *NormalizationTypes() * boost::unit_test::data::xrange(3, 9, 2) * boost::unit_test::data::make({ 0.5f, 1.0f, 2.0f }),
+                     shape, dt, norm_type, norm_size, beta)
+{
+    // Provide normalization layer information
+    NormalizationLayerInfo norm_info(norm_type, norm_size, 5, beta);
+
+    // Compute function
+    Tensor dst = compute_normalization_layer(shape, dt, norm_info);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_normalization_layer(shape, dt, norm_info);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, normalization_layer_tolerance(DataType::F32));
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(Quantized)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall,
+                     SmallShapes() * DataType::QS8 *NormalizationTypes() * boost::unit_test::data::xrange(3, 7, 2) * (boost::unit_test::data::xrange(1, 6) * boost::unit_test::data::make({ 0.5f, 1.0f, 2.0f })),
+                     shape, dt, norm_type, norm_size, fixed_point_position, beta)
+{
+    // Provide normalization layer information
+    NormalizationLayerInfo norm_info(norm_type, norm_size, 5, beta, 1.f);
+
+    // Compute function
+    Tensor dst = compute_normalization_layer(shape, dt, norm_info, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_normalization_layer(shape, dt, norm_info, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, normalization_layer_tolerance(DataType::QS8));
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/PixelWiseMultiplication.cpp b/tests/validation/NEON/PixelWiseMultiplication.cpp
new file mode 100644
index 0000000000..c6c2792126
--- /dev/null
+++ b/tests/validation/NEON/PixelWiseMultiplication.cpp
@@ -0,0 +1,428 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute Neon arithmetic addition function.
+ *
+ * @param[in] shape                Shape of the input and output tensors.
+ * @param[in] dt_in0               Data type of first input tensor.
+ * @param[in] dt_in1               Data type of second input tensor.
+ * @param[in] dt_out               Data type of the output tensor.
+ * @param[in] scale                Non-negative scale.
+ * @param[in] convert_policy       Overflow policy of the operation.
+ * @param[in] rounding_policy      Rounding policy of the operation.
+ * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_pixel_wise_multiplication(const TensorShape &shape, DataType dt_in0, DataType dt_in1, DataType dt_out, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy,
+                                         int fixed_point_position = 0)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, dt_in0, 1, fixed_point_position);
+    Tensor src2 = create_tensor(shape, dt_in1, 1, fixed_point_position);
+    Tensor dst  = create_tensor(shape, dt_out, 1, fixed_point_position);
+
+    // Create and configure function
+    NEPixelWiseMultiplication multiply;
+    multiply.configure(&src1, &src2, &dst, scale, convert_policy, rounding_policy);
+
+    // Allocate tensors
+    src1.allocator()->allocate();
+    src2.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src1.info()->is_resizable());
+    BOOST_TEST(!src2.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    library->fill_tensor_uniform(NEAccessor(src1), 0);
+    library->fill_tensor_uniform(NEAccessor(src2), 1);
+
+    // Compute function
+    multiply.run();
+
+    return dst;
+}
+
+void validate_configuration(const Tensor &src1, const Tensor &src2, Tensor &dst, TensorShape shape, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
+{
+    BOOST_TEST(src1.info()->is_resizable());
+    BOOST_TEST(src2.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create and configure function
+    NEPixelWiseMultiplication multiply;
+    multiply.configure(&src1, &src2, &dst, scale, convert_policy, rounding_policy);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src1.info()->valid_region(), valid_region);
+    validate(src2.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding(0, required_padding(shape.x(), 16), 0, 0);
+    validate(src1.info()->padding(), padding);
+    validate(src2.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(PixelWiseMultiplication)
+
+BOOST_AUTO_TEST_SUITE(U8)
+
+BOOST_AUTO_TEST_SUITE(Scale255)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * (1.f / 255.f) * ConvertPolicies()
+                     * RoundingPolicy::TO_NEAREST_UP,
+                     shape, scale, convert_policy, rounding_policy)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, DataType::U8);
+    Tensor src2 = create_tensor(shape, DataType::U8);
+    Tensor dst  = create_tensor(shape, DataType::U8);
+
+    validate_configuration(src1, src2, dst, shape, scale, convert_policy, rounding_policy);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * (1.f / 255.f) * ConvertPolicies() * RoundingPolicy::TO_NEAREST_UP,
+                     shape, scale, convert_policy, rounding_policy)
+{
+    // Compute function
+    Tensor dst = compute_pixel_wise_multiplication(shape, DataType::U8, DataType::U8, DataType::U8, scale, convert_policy,
+                                                   rounding_policy);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_pixel_wise_multiplication(shape, DataType::U8, DataType::U8,
+                                                                               DataType::U8, scale, convert_policy, rounding_policy);
+
+    // Validate output
+    // Allow tolerance value of 1.f to counteract imprecision due to 32-bit float conversion
+    validate(NEAccessor(dst), ref_dst, 1.f, 0.f, std::numeric_limits<uint8_t>::max());
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * (1.f / 255.f) * ConvertPolicies() * RoundingPolicy::TO_NEAREST_UP,
+                     shape, scale, convert_policy, rounding_policy)
+{
+    // Compute function
+    Tensor dst = compute_pixel_wise_multiplication(shape, DataType::U8, DataType::U8, DataType::U8, scale, convert_policy,
+                                                   rounding_policy);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_pixel_wise_multiplication(shape, DataType::U8, DataType::U8,
+                                                                               DataType::U8, scale, convert_policy, rounding_policy);
+
+    // Validate output
+    // Allow tolerance value of 1.f to counteract imprecision due to 32-bit float conversion
+    validate(NEAccessor(dst), ref_dst, 1.f, 0.f, std::numeric_limits<uint8_t>::max());
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(ScaleOther)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ 1.f, 1.f / 32768.f })
+                     * ConvertPolicies()
+                     * RoundingPolicy::TO_ZERO,
+                     shape, scale, convert_policy, rounding_policy)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, DataType::U8);
+    Tensor src2 = create_tensor(shape, DataType::U8);
+    Tensor dst  = create_tensor(shape, DataType::U8);
+
+    validate_configuration(src1, src2, dst, shape, scale, convert_policy, rounding_policy);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ 1.f, 1.f / 32768.f }) * ConvertPolicies()
+                     * RoundingPolicy::TO_ZERO,
+                     shape, scale, convert_policy, rounding_policy)
+{
+    // Compute function
+    Tensor dst = compute_pixel_wise_multiplication(shape, DataType::U8, DataType::U8, DataType::U8, scale, convert_policy,
+                                                   rounding_policy);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_pixel_wise_multiplication(shape, DataType::U8, DataType::U8,
+                                                                               DataType::U8, scale, convert_policy, rounding_policy);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ 1.f, 1.f / 32768.f }) * ConvertPolicies()
+                     * RoundingPolicy::TO_ZERO,
+                     shape, scale, convert_policy, rounding_policy)
+{
+    // Compute function
+    Tensor dst = compute_pixel_wise_multiplication(shape, DataType::U8, DataType::U8, DataType::U8, scale, convert_policy,
+                                                   rounding_policy);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_pixel_wise_multiplication(shape, DataType::U8, DataType::U8,
+                                                                               DataType::U8, scale, convert_policy, rounding_policy);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(S16)
+BOOST_AUTO_TEST_SUITE(Scale255)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ DataType::U8, DataType::S16 }) * (1.f / 255.f) * ConvertPolicies()
+                     * RoundingPolicy::TO_NEAREST_UP,
+                     shape, dt, scale, convert_policy, rounding_policy)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, dt);
+    Tensor src2 = create_tensor(shape, DataType::S16);
+    Tensor dst  = create_tensor(shape, DataType::S16);
+
+    validate_configuration(src1, src2, dst, shape, scale, convert_policy, rounding_policy);
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ DataType::U8, DataType::S16 }) * (1.f / 255.f) * ConvertPolicies()
+                     * RoundingPolicy::TO_NEAREST_UP,
+                     shape, dt, scale, convert_policy, rounding_policy)
+{
+    // Compute function
+    Tensor dst = compute_pixel_wise_multiplication(shape, dt, DataType::S16, DataType::S16, scale, convert_policy, rounding_policy);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_pixel_wise_multiplication(shape, dt, DataType::S16, DataType::S16, scale, convert_policy, rounding_policy);
+
+    // Validate output
+    // Allow tolerance value of 2.f to counteract imprecision due to 32-bit float conversion
+    validate(NEAccessor(dst), ref_dst, 2.f, 0.f, std::numeric_limits<int16_t>::max());
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ DataType::U8, DataType::S16 }) * (1.f / 255.f) * ConvertPolicies()
+                     * RoundingPolicy::TO_NEAREST_UP,
+                     shape, dt, scale, convert_policy, rounding_policy)
+{
+    // Compute function
+    Tensor dst = compute_pixel_wise_multiplication(shape, dt, DataType::S16, DataType::S16, scale, convert_policy, rounding_policy);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_pixel_wise_multiplication(shape, dt, DataType::S16, DataType::S16,
+                                                                               scale, convert_policy, rounding_policy);
+
+    // Validate output
+    // Allow tolerance value of 2.f to counteract imprecision due to 32-bit float conversion
+    validate(NEAccessor(dst), ref_dst, 2.f, 0.f, std::numeric_limits<int16_t>::max());
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(ScaleOther)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ DataType::U8, DataType::S16 }) * boost::unit_test::data::make({ 1.f, 1.f / 32768.f })
+                     * ConvertPolicies()
+                     * RoundingPolicy::TO_ZERO,
+                     shape, dt, scale, convert_policy, rounding_policy)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, dt);
+    Tensor src2 = create_tensor(shape, DataType::S16);
+    Tensor dst  = create_tensor(shape, DataType::S16);
+
+    validate_configuration(src1, src2, dst, shape, scale, convert_policy, rounding_policy);
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ DataType::U8, DataType::S16 }) * boost::unit_test::data::make({ 1.f, 1.f / 32768.f }) * ConvertPolicies()
+                     * RoundingPolicy::TO_ZERO,
+                     shape, dt, scale, convert_policy, rounding_policy)
+{
+    // Compute function
+    Tensor dst = compute_pixel_wise_multiplication(shape, dt, DataType::S16, DataType::S16, scale, convert_policy, rounding_policy);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_pixel_wise_multiplication(shape, dt, DataType::S16, DataType::S16, scale, convert_policy, rounding_policy);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ DataType::U8, DataType::S16 }) * boost::unit_test::data::make({ 1.f, 1.f / 32768.f }) * ConvertPolicies()
+                     * RoundingPolicy::TO_ZERO,
+                     shape, dt, scale, convert_policy, rounding_policy)
+{
+    // Compute function
+    Tensor dst = compute_pixel_wise_multiplication(shape, dt, DataType::S16, DataType::S16, scale, convert_policy, rounding_policy);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_pixel_wise_multiplication(shape, dt, DataType::S16, DataType::S16,
+                                                                               scale, convert_policy, rounding_policy);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(F32)
+BOOST_AUTO_TEST_SUITE(Scale255)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * (1.f / 255.f) * ConvertPolicies()
+                     * RoundingPolicy::TO_NEAREST_UP,
+                     shape, scale, convert_policy, rounding_policy)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, DataType::F32);
+    Tensor src2 = create_tensor(shape, DataType::F32);
+    Tensor dst  = create_tensor(shape, DataType::F32);
+
+    validate_configuration(src1, src2, dst, shape, scale, convert_policy, rounding_policy);
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * (1.f / 255.f) * ConvertPolicies()
+                     * RoundingPolicy::TO_NEAREST_UP,
+                     shape, scale, convert_policy, rounding_policy)
+{
+    // Compute function
+    Tensor dst = compute_pixel_wise_multiplication(shape, DataType::F32, DataType::F32, DataType::F32, scale, convert_policy, rounding_policy);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_pixel_wise_multiplication(shape, DataType::F32, DataType::F32, DataType::F32, scale, convert_policy, rounding_policy);
+
+    // Validate output
+    // Allow tolerance value of 1.f to counteract imprecision due to 32-bit float conversion
+    validate(NEAccessor(dst), ref_dst, 1.f, 0.f, std::numeric_limits<int16_t>::max());
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * (1.f / 255.f) * ConvertPolicies()
+                     * RoundingPolicy::TO_NEAREST_UP,
+                     shape, scale, convert_policy, rounding_policy)
+{
+    // Compute function
+    Tensor dst = compute_pixel_wise_multiplication(shape, DataType::F32, DataType::F32, DataType::F32, scale, convert_policy, rounding_policy);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_pixel_wise_multiplication(shape, DataType::F32, DataType::F32, DataType::F32,
+                                                                               scale, convert_policy, rounding_policy);
+
+    // Validate output
+    // Allow tolerance value of 1.f to counteract imprecision due to 32-bit float conversion
+    validate(NEAccessor(dst), ref_dst, 1.f, 0.f, std::numeric_limits<int16_t>::max());
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(ScaleOther)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * boost::unit_test::data::make({ 1.f, 1.f / 32768.f })
+                     * ConvertPolicies()
+                     * RoundingPolicy::TO_ZERO,
+                     shape, scale, convert_policy, rounding_policy)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, DataType::F32);
+    Tensor src2 = create_tensor(shape, DataType::F32);
+    Tensor dst  = create_tensor(shape, DataType::F32);
+
+    validate_configuration(src1, src2, dst, shape, scale, convert_policy, rounding_policy);
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * boost::unit_test::data::make({ 1.f, 1.f / 32768.f }) * ConvertPolicies()
+                     * RoundingPolicy::TO_ZERO,
+                     shape, scale, convert_policy, rounding_policy)
+{
+    // Compute function
+    Tensor dst = compute_pixel_wise_multiplication(shape, DataType::F32, DataType::F32, DataType::F32, scale, convert_policy, rounding_policy);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_pixel_wise_multiplication(shape, DataType::F32, DataType::F32, DataType::F32, scale, convert_policy, rounding_policy);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * boost::unit_test::data::make({ 1.f, 1.f / 32768.f }) * ConvertPolicies()
+                     * RoundingPolicy::TO_ZERO,
+                     shape, scale, convert_policy, rounding_policy)
+{
+    // Compute function
+    Tensor dst = compute_pixel_wise_multiplication(shape, DataType::F32, DataType::F32, DataType::F32, scale, convert_policy, rounding_policy);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_pixel_wise_multiplication(shape, DataType::F32, DataType::F32, DataType::F32,
+                                                                               scale, convert_policy, rounding_policy);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(QS8)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * DataType::QS8 *ConvertPolicies() * RoundingPolicy::TO_ZERO * boost::unit_test::data::xrange<int>(1, 7),
+                     shape, dt, convert_policy, rounding_policy, fixed_point_position)
+{
+    // Compute function
+    Tensor dst = compute_pixel_wise_multiplication(shape, dt, dt, dt, 1.f, convert_policy, rounding_policy, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_fixed_point_pixel_wise_multiplication(shape, dt, dt, dt, 1.f, fixed_point_position, convert_policy, rounding_policy);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/Pooling/PoolingLayer.cpp b/tests/validation/NEON/Pooling/PoolingLayer.cpp
new file mode 100644
index 0000000000..b15ad1c5e6
--- /dev/null
+++ b/tests/validation/NEON/Pooling/PoolingLayer.cpp
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TypePrinter.h"
+#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h"
+#include "tests/dataset/PoolingLayerDataset.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include <iostream>
+#include <random>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+const float tolerance_q = 0;     /**< Tolerance value for comparing reference's output against implementation's output for quantized input */
+const float tolerance_f = 1e-05; /**< Tolerance value for comparing reference's output against implementation's output for float input */
+
+/** Compute Neon pooling layer function.
+ *
+ * @param[in] shape     Shape of the input and output tensors.
+ * @param[in] dt        Data type of input and output tensors.
+ * @param[in] pool_info Pooling Layer information.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_pooling_layer(const TensorShape &shape_in, const TensorShape &shape_out, DataType dt, PoolingLayerInfo pool_info, int fixed_point_position = 0)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape_in, dt, 1, fixed_point_position);
+    Tensor dst = create_tensor(shape_out, dt, 1, fixed_point_position);
+
+    // Create and configure function
+    NEPoolingLayer pool;
+    pool.configure(&src, &dst, pool_info);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    int min = 0;
+    int max = 0;
+    switch(dt)
+    {
+        case DataType::F32:
+            min = -1;
+            max = 1;
+            break;
+        case DataType::QS8:
+            min = -(1 << fixed_point_position);
+            max = (1 << fixed_point_position);
+            break;
+        default:
+            ARM_COMPUTE_ERROR("DataType not supported.");
+    }
+    std::uniform_real_distribution<> distribution(min, max);
+    library->fill(NEAccessor(src), distribution, 0);
+
+    // Compute function
+    pool.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(Pooling)
+BOOST_AUTO_TEST_SUITE(PoolingLayer)
+
+BOOST_AUTO_TEST_SUITE(Float)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RandomDataset,
+                     RandomPoolingLayerDataset() * boost::unit_test::data::make(DataType::F32),
+                     obj, dt)
+{
+    // Compute function
+    Tensor dst = compute_pooling_layer(obj.src_shape, obj.dst_shape, dt, obj.info);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_pooling_layer(obj.src_shape, obj.dst_shape, dt, obj.info);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance_f, 0);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(Quantized)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RandomDataset,
+                     RandomPoolingLayerDataset() * boost::unit_test::data::make(DataType::QS8) * boost::unit_test::data::xrange(1, 5),
+                     obj, dt, fixed_point_position)
+{
+    // Compute function
+    Tensor dst = compute_pooling_layer(obj.src_shape, obj.dst_shape, dt, obj.info, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_pooling_layer(obj.src_shape, obj.dst_shape, dt, obj.info, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance_q, 0);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/SoftmaxLayer.cpp b/tests/validation/NEON/SoftmaxLayer.cpp
new file mode 100644
index 0000000000..f5c7a21abd
--- /dev/null
+++ b/tests/validation/NEON/SoftmaxLayer.cpp
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Tolerance for float operations */
+const float tolerance = 0.000001f;
+/** Tolerance for fixed point operations */
+const float tolerance_fixed_point = 2.f;
+
+/** Compute Neon softmax layer function.
+ *
+ * @param[in] shape                Shape of the input and output tensors.
+ * @param[in] dt                   Shape Data type of tensors.
+ * @param[in] fixed_point_position (Optional) Number of bits for the fractional part of fixed point numbers.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_softmax_layer(const TensorShape &shape, DataType dt, int fixed_point_position = 0)
+{
+    // Create tensors
+    Tensor src = create_tensor(shape, dt, 1, fixed_point_position);
+    Tensor dst = create_tensor(shape, dt, 1, fixed_point_position);
+
+    // Create and configure function
+    NESoftmaxLayer smx_layer;
+    smx_layer.configure(&src, &dst);
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    if(arm_compute::is_data_type_float(dt))
+    {
+        std::uniform_real_distribution<> distribution(-10, 10);
+        library->fill(NEAccessor(src), distribution, 0);
+    }
+    else
+    {
+        int                             one_fixed = 1 << fixed_point_position;
+        std::uniform_int_distribution<> distribution(-one_fixed, one_fixed);
+        library->fill(NEAccessor(src), distribution, 0);
+    }
+
+    // Compute function
+    smx_layer.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(SoftmaxLayer)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration, (SmallShapes() + LargeShapes()) * CNNDataTypes(), shape, dt)
+{
+    // Set fixed point position data type allowed
+    int fixed_point_position = (arm_compute::is_data_type_fixed_point(dt)) ? 3 : 0;
+
+    // Create tensors
+    Tensor src = create_tensor(shape, dt, 1, fixed_point_position);
+    Tensor dst = create_tensor(shape, dt, 1, fixed_point_position);
+
+    BOOST_TEST(src.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create and configure function
+    NESoftmaxLayer smx_layer;
+    smx_layer.configure(&src, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    int               step = 16 / arm_compute::data_size_from_type(dt);
+    const PaddingSize padding(0, required_padding(shape.x(), step), 0, 0);
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+BOOST_AUTO_TEST_SUITE(Float)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * CNNFloatDataTypes(), shape, dt)
+{
+    // Compute function
+    Tensor dst = compute_softmax_layer(shape, dt);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_softmax_layer(shape, dt);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * CNNFloatDataTypes(), shape, dt)
+{
+    // Compute function
+    Tensor dst = compute_softmax_layer(shape, dt);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_softmax_layer(shape, dt);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(Quantized)
+// Testing for fixed point position [1,6) as reciprocal limits the maximum fixed point position to 5
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * CNNFixedPointDataTypes() * boost::unit_test::data::xrange(1, 6),
+                     shape, dt, fixed_point_position)
+{
+    // Compute function
+    Tensor dst = compute_softmax_layer(shape, dt, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_softmax_layer(shape, dt, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance_fixed_point);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge, LargeShapes() * CNNFixedPointDataTypes() * boost::unit_test::data::xrange(1, 6),
+                     shape, dt, fixed_point_position)
+{
+    // Compute function
+    Tensor dst = compute_softmax_layer(shape, dt, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_softmax_layer(shape, dt, fixed_point_position);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst, tolerance_fixed_point);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/NEON/Threshold.cpp b/tests/validation/NEON/Threshold.cpp
new file mode 100644
index 0000000000..6ac6f3d26b
--- /dev/null
+++ b/tests/validation/NEON/Threshold.cpp
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Globals.h"
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "TensorLibrary.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "dataset/ThresholdDataset.h"
+#include "validation/Datasets.h"
+#include "validation/Reference.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/NEON/functions/NEThreshold.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "arm_compute/runtime/TensorAllocator.h"
+
+#include "boost_wrapper.h"
+
+#include <random>
+#include <string>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+/** Compute Threshold function.
+ *
+ * @param[in] shape       Shape of the input and output tensors.
+ * @param[in] threshold   Threshold. When the threshold type is RANGE, this is used as the lower threshold.
+ * @param[in] false_value value to set when the condition is not respected.
+ * @param[in] true_value  value to set when the condition is respected.
+ * @param[in] type        Thresholding type. Either RANGE or BINARY.
+ * @param[in] upper       Upper threshold. Only used when the thresholding type is RANGE.
+ *
+ * @return Computed output tensor.
+ */
+Tensor compute_threshold(const TensorShape &shape, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, DataType::U8);
+    Tensor dst  = create_tensor(shape, DataType::U8);
+
+    // Create and configure function
+    NEThreshold thrsh;
+    thrsh.configure(&src1, &dst, threshold, false_value, true_value, type, upper);
+
+    // Allocate tensors
+    src1.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    BOOST_TEST(!src1.info()->is_resizable());
+    BOOST_TEST(!dst.info()->is_resizable());
+
+    // Fill tensors
+    library->fill_tensor_uniform(NEAccessor(src1), 0);
+
+    // Compute function
+    thrsh.run();
+
+    return dst;
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(NEON)
+BOOST_AUTO_TEST_SUITE(Threshold)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Configuration,
+                     (SmallShapes() + LargeShapes()) * ThresholdDataset(),
+                     shape, thrshConf)
+{
+    // Create tensors
+    Tensor src1 = create_tensor(shape, DataType::U8);
+    Tensor dst  = create_tensor(shape, DataType::U8);
+
+    BOOST_TEST(src1.info()->is_resizable());
+    BOOST_TEST(dst.info()->is_resizable());
+
+    // Create and configure function
+    NEThreshold thrsh;
+    thrsh.configure(&src1, &dst, thrshConf.threshold, thrshConf.false_value, thrshConf.true_value, thrshConf.type, thrshConf.upper);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src1.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding(0, required_padding(shape.x(), 16), 0, 0);
+    validate(src1.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall,
+                     SmallShapes() * ThresholdDataset(),
+                     shape, thrshConf)
+{
+    // Compute function
+    Tensor dst = compute_threshold(shape, thrshConf.threshold, thrshConf.false_value, thrshConf.true_value, thrshConf.type, thrshConf.upper);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_threshold(shape, thrshConf.threshold, thrshConf.false_value, thrshConf.true_value, thrshConf.type, thrshConf.upper);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RunLarge,
+                     LargeShapes() * ThresholdDataset(),
+                     shape, thrshConf)
+{
+    // Compute function
+    Tensor dst = compute_threshold(shape, thrshConf.threshold, thrshConf.false_value, thrshConf.true_value, thrshConf.type, thrshConf.upper);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_threshold(shape, thrshConf.threshold, thrshConf.false_value, thrshConf.true_value, thrshConf.type, thrshConf.upper);
+
+    // Validate output
+    validate(NEAccessor(dst), ref_dst);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/Reference.cpp b/tests/validation/Reference.cpp
new file mode 100644
index 0000000000..263c57b16b
--- /dev/null
+++ b/tests/validation/Reference.cpp
@@ -0,0 +1,596 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Reference.h"
+
+#include "Globals.h"
+#include "Helpers.h"
+#include "ReferenceCPP.h"
+#include "TensorLibrary.h"
+#include "validation/Helpers.h"
+
+#include <random>
+
+using namespace arm_compute::test;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+RawTensor Reference::compute_reference_integral_image(const TensorShape &shape)
+{
+    // Create reference
+    RawTensor ref_src = library->get(shape, DataType::U8);
+    RawTensor ref_dst = library->get(shape, DataType::U32);
+
+    // Fill reference
+    library->fill_tensor_uniform(ref_src, 0);
+
+    // Compute reference
+    ReferenceCPP::integral_image(ref_src, ref_dst);
+
+    return ref_dst;
+}
+RawTensor Reference::compute_reference_absolute_difference(const TensorShape &shape, DataType dt_in0, DataType dt_in1, DataType dt_out)
+{
+    // Create reference
+    RawTensor ref_src1 = library->get(shape, dt_in0);
+    RawTensor ref_src2 = library->get(shape, dt_in1);
+    RawTensor ref_dst  = library->get(shape, dt_out);
+
+    // Fill reference
+    library->fill_tensor_uniform(ref_src1, 0);
+    library->fill_tensor_uniform(ref_src2, 1);
+
+    // Compute reference
+    ReferenceCPP::absolute_difference(ref_src1, ref_src2, ref_dst);
+
+    return ref_dst;
+}
+
+RawTensor Reference::compute_reference_accumulate(const TensorShape &shape)
+{
+    // Create reference
+    RawTensor ref_src = library->get(shape, DataType::U8);
+    RawTensor ref_dst = library->get(shape, DataType::S16);
+
+    // Fill reference
+    library->fill_tensor_uniform(ref_src, 0);
+    library->fill_tensor_uniform(ref_dst, 1);
+
+    // Compute reference
+    ReferenceCPP::accumulate(ref_src, ref_dst);
+
+    return ref_dst;
+}
+
+RawTensor Reference::compute_reference_accumulate_squared(const TensorShape &shape, uint32_t shift)
+{
+    // Create reference
+    RawTensor ref_src = library->get(shape, DataType::U8);
+    RawTensor ref_dst = library->get(shape, DataType::S16);
+
+    // Fill reference
+    // ref_dst tensor filled with non-negative values
+    library->fill_tensor_uniform(ref_src, 0);
+    library->fill_tensor_uniform(ref_dst, 1, static_cast<int16_t>(0), std::numeric_limits<int16_t>::max());
+
+    // Compute reference
+    ReferenceCPP::accumulate_squared(ref_src, ref_dst, shift);
+
+    return ref_dst;
+}
+
+RawTensor Reference::compute_reference_accumulate_weighted(const TensorShape &shape, float alpha)
+{
+    // Create reference
+    RawTensor ref_src = library->get(shape, DataType::U8);
+    RawTensor ref_dst = library->get(shape, DataType::U8);
+
+    // Fill reference
+    library->fill_tensor_uniform(ref_src, 0);
+    library->fill_tensor_uniform(ref_dst, 1);
+
+    // Compute reference
+    ReferenceCPP::accumulate_weighted(ref_src, ref_dst, alpha);
+
+    return ref_dst;
+}
+
+RawTensor Reference::compute_reference_arithmetic_addition(const TensorShape &shape, DataType dt_in0, DataType dt_in1, DataType dt_out, ConvertPolicy convert_policy)
+{
+    // Create reference
+    RawTensor ref_src1 = library->get(shape, dt_in0);
+    RawTensor ref_src2 = library->get(shape, dt_in1);
+    RawTensor ref_dst  = library->get(shape, dt_out);
+
+    // Fill reference
+    library->fill_tensor_uniform(ref_src1, 0);
+    library->fill_tensor_uniform(ref_src2, 1);
+
+    // Compute reference
+    ReferenceCPP::arithmetic_addition(ref_src1, ref_src2, ref_dst, convert_policy);
+
+    return ref_dst;
+}
+
+RawTensor Reference::compute_reference_arithmetic_subtraction(const TensorShape &shape, DataType dt_in0, DataType dt_in1, DataType dt_out, ConvertPolicy convert_policy)
+{
+    // Create reference
+    RawTensor ref_src1 = library->get(shape, dt_in0);
+    RawTensor ref_src2 = library->get(shape, dt_in1);
+    RawTensor ref_dst  = library->get(shape, dt_out);
+
+    // Fill reference
+    library->fill_tensor_uniform(ref_src1, 0);
+    library->fill_tensor_uniform(ref_src2, 1);
+
+    // Compute reference
+    ReferenceCPP::arithmetic_subtraction(ref_src1, ref_src2, ref_dst, convert_policy);
+
+    return ref_dst;
+}
+
+RawTensor Reference::compute_reference_bitwise_and(const TensorShape &shape)
+{
+    // Create reference
+    RawTensor ref_src1 = library->get(shape, DataType::U8);
+    RawTensor ref_src2 = library->get(shape, DataType::U8);
+    RawTensor ref_dst  = library->get(shape, DataType::U8);
+
+    // Fill reference
+    library->fill_tensor_uniform(ref_src1, 0);
+    library->fill_tensor_uniform(ref_src2, 1);
+
+    // Compute reference
+    ReferenceCPP::bitwise_and(ref_src1, ref_src2, ref_dst);
+
+    return ref_dst;
+}
+
+RawTensor Reference::compute_reference_bitwise_or(const TensorShape &shape)
+{
+    // Create reference
+    RawTensor ref_src1 = library->get(shape, DataType::U8);
+    RawTensor ref_src2 = library->get(shape, DataType::U8);
+    RawTensor ref_dst  = library->get(shape, DataType::U8);
+
+    // Fill reference
+    library->fill_tensor_uniform(ref_src1, 0);
+    library->fill_tensor_uniform(ref_src2, 1);
+
+    // Compute reference
+    ReferenceCPP::bitwise_or(ref_src1, ref_src2, ref_dst);
+
+    return ref_dst;
+}
+
+RawTensor Reference::compute_reference_bitwise_xor(const TensorShape &shape)
+{
+    // Create reference
+    RawTensor ref_src1 = library->get(shape, DataType::U8);
+    RawTensor ref_src2 = library->get(shape, DataType::U8);
+    RawTensor ref_dst  = library->get(shape, DataType::U8);
+
+    // Fill reference
+    library->fill_tensor_uniform(ref_src1, 0);
+    library->fill_tensor_uniform(ref_src2, 1);
+
+    // Compute reference
+    ReferenceCPP::bitwise_xor(ref_src1, ref_src2, ref_dst);
+
+    return ref_dst;
+}
+
+RawTensor Reference::compute_reference_bitwise_not(const TensorShape &shape)
+{
+    // Create reference
+    RawTensor ref_src = library->get(shape, DataType::U8);
+    RawTensor ref_dst = library->get(shape, DataType::U8);
+
+    // Fill reference
+    library->fill_tensor_uniform(ref_src, 0);
+
+    // Compute reference
+    ReferenceCPP::bitwise_not(ref_src, ref_dst);
+
+    return ref_dst;
+}
+
+RawTensor Reference::compute_reference_box3x3(const TensorShape &shape)
+{
+    // Create reference
+    RawTensor ref_src = library->get(shape, DataType::U8);
+    RawTensor ref_dst = library->get(shape, DataType::U8);
+
+    // Fill reference
+    library->fill_tensor_uniform(ref_src, 0);
+
+    // Compute reference
+    ReferenceCPP::box3x3(ref_src, ref_dst);
+
+    return ref_dst;
+}
+
+RawTensor Reference::compute_reference_depth_convert(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, uint32_t fixed_point_position)
+{
+    RawTensor ref_src = library->get(shape, dt_in, 1, fixed_point_position);
+    RawTensor ref_dst = library->get(shape, dt_out, 1, fixed_point_position);
+
+    // Fill reference
+    library->fill_tensor_uniform(ref_src, 0);
+
+    // Compute reference
+    ReferenceCPP::depth_convert(ref_src, ref_dst, policy, shift);
+
+    return ref_dst;
+}
+
+RawTensor Reference::compute_reference_gemm(const TensorShape &src_shape1, const TensorShape &src_shape2, const TensorShape &src_shape3,
+                                            const TensorShape &dst_shape, float alpha, float beta, DataType dt, int fixed_point_position)
+{
+    RawTensor src1 = library->get(src_shape1, dt, 1, fixed_point_position);
+    RawTensor src2 = library->get(src_shape2, dt, 1, fixed_point_position);
+    RawTensor src3 = library->get(src_shape3, dt, 1, fixed_point_position);
+    RawTensor dst  = library->get(dst_shape, dt, 1, fixed_point_position);
+
+    // Fill reference
+    if(dt == DataType::F32)
+    {
+        std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+        library->fill(src1, distribution, 0);
+        library->fill(src2, distribution, 1);
+        library->fill(src3, distribution, 2);
+    }
+    else
+    {
+        library->fill_tensor_uniform(src1, 0);
+        library->fill_tensor_uniform(src2, 1);
+        library->fill_tensor_uniform(src3, 2);
+    }
+
+    // Compute reference
+    ReferenceCPP::gemm(src1, src2, src3, dst, alpha, beta);
+
+    return dst;
+}
+
+RawTensor Reference::compute_reference_pixel_wise_multiplication(const TensorShape &shape, DataType dt_in0, DataType dt_in1, DataType dt_out, float scale, ConvertPolicy convert_policy,
+                                                                 RoundingPolicy rounding_policy)
+{
+    // Create reference
+    RawTensor ref_src1 = library->get(shape, dt_in0);
+    RawTensor ref_src2 = library->get(shape, dt_in1);
+    RawTensor ref_dst  = library->get(shape, dt_out);
+
+    // Fill reference
+    library->fill_tensor_uniform(ref_src1, 0);
+    library->fill_tensor_uniform(ref_src2, 1);
+
+    // Compute reference
+    ReferenceCPP::pixel_wise_multiplication(ref_src1, ref_src2, ref_dst, scale, convert_policy, rounding_policy);
+
+    return ref_dst;
+}
+
+RawTensor Reference::compute_reference_fixed_point_pixel_wise_multiplication(const TensorShape &shape, DataType dt_in0, DataType dt_in1, DataType dt_out, float scale, int fixed_point_position,
+                                                                             ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
+{
+    // Create reference
+    RawTensor ref_src1 = library->get(shape, dt_in0, 1, fixed_point_position);
+    RawTensor ref_src2 = library->get(shape, dt_in1, 1, fixed_point_position);
+    RawTensor ref_dst  = library->get(shape, dt_out, 1, fixed_point_position);
+
+    // Fill reference
+    library->fill_tensor_uniform(ref_src1, 0);
+    library->fill_tensor_uniform(ref_src2, 1);
+
+    // Compute reference
+    ReferenceCPP::fixed_point_pixel_wise_multiplication(ref_src1, ref_src2, ref_dst, scale, convert_policy, rounding_policy);
+
+    return ref_dst;
+}
+
+RawTensor Reference::compute_reference_threshold(const TensorShape &shape, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper)
+{
+    // Create reference
+    RawTensor ref_src1 = library->get(shape, DataType::U8);
+    RawTensor ref_dst  = library->get(shape, DataType::U8);
+
+    // Fill reference
+    library->fill_tensor_uniform(ref_src1, 0);
+
+    // Compute reference
+    ReferenceCPP::threshold(ref_src1, ref_dst, threshold, false_value, true_value, type, upper);
+
+    return ref_dst;
+}
+
+RawTensor Reference::compute_reference_activation_layer(const TensorShape &shape, DataType dt, ActivationLayerInfo act_info, int fixed_point_position)
+{
+    // Create reference
+    RawTensor ref_src = library->get(shape, dt, 1, fixed_point_position);
+    RawTensor ref_dst = library->get(shape, dt, 1, fixed_point_position);
+
+    // Fill reference
+    if(dt == DataType::F32)
+    {
+        float min_bound = 0;
+        float max_bound = 0;
+        std::tie(min_bound, max_bound) = get_activation_layer_test_bounds<float>(act_info.activation());
+        std::uniform_real_distribution<> distribution(min_bound, max_bound);
+        library->fill(ref_src, distribution, 0);
+    }
+    else
+    {
+        int min_bound = 0;
+        int max_bound = 0;
+        std::tie(min_bound, max_bound) = get_activation_layer_test_bounds<int8_t>(act_info.activation(), fixed_point_position);
+        std::uniform_int_distribution<> distribution(min_bound, max_bound);
+        library->fill(ref_src, distribution, 0);
+    }
+
+    // Compute reference
+    ReferenceCPP::activation_layer(ref_src, ref_dst, act_info);
+
+    return ref_dst;
+}
+
+RawTensor Reference::compute_reference_batch_normalization_layer(const TensorShape &shape0, const TensorShape &shape1, DataType dt, float epsilon, int fixed_point_position)
+{
+    // Create reference
+    RawTensor ref_src   = library->get(shape0, dt, 1, fixed_point_position);
+    RawTensor ref_dst   = library->get(shape0, dt, 1, fixed_point_position);
+    RawTensor ref_mean  = library->get(shape1, dt, 1, fixed_point_position);
+    RawTensor ref_var   = library->get(shape1, dt, 1, fixed_point_position);
+    RawTensor ref_beta  = library->get(shape1, dt, 1, fixed_point_position);
+    RawTensor ref_gamma = library->get(shape1, dt, 1, fixed_point_position);
+
+    // Fill tensors with values from -1 to 1.
+    if(dt == DataType::F32)
+    {
+        float min_bound = 0.f;
+        float max_bound = 0.f;
+        std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<float>();
+        std::uniform_real_distribution<> distribution(min_bound, max_bound);
+        std::uniform_real_distribution<> distribution_var(0, max_bound);
+        library->fill(ref_src, distribution, 0);
+        library->fill(ref_mean, distribution, 1);
+        library->fill(ref_var, distribution_var, 0);
+        library->fill(ref_beta, distribution, 3);
+        library->fill(ref_gamma, distribution, 4);
+    }
+    else
+    {
+        int min_bound = 0;
+        int max_bound = 0;
+        std::tie(min_bound, max_bound) = get_batchnormalization_layer_test_bounds<int8_t>(fixed_point_position);
+        std::uniform_int_distribution<> distribution(min_bound, max_bound);
+        std::uniform_int_distribution<> distribution_var(0, max_bound);
+        library->fill(ref_src, distribution, 0);
+        library->fill(ref_mean, distribution, 1);
+        library->fill(ref_var, distribution_var, 0);
+        library->fill(ref_beta, distribution, 3);
+        library->fill(ref_gamma, distribution, 4);
+    }
+
+    // Compute reference
+    ReferenceCPP::batch_normalization_layer(ref_src, ref_dst, ref_mean, ref_var, ref_beta, ref_gamma, epsilon, fixed_point_position);
+
+    return ref_dst;
+}
+
+RawTensor Reference::compute_reference_convolution_layer(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, DataType dt,
+                                                         const PadStrideInfo &conv_info, int fixed_point_position)
+{
+    // Create reference
+    RawTensor ref_src     = library->get(input_shape, dt, 1, fixed_point_position);
+    RawTensor ref_weights = library->get(weights_shape, dt, 1, fixed_point_position);
+    RawTensor ref_bias    = library->get(bias_shape, dt, 1, fixed_point_position);
+    RawTensor ref_dst     = library->get(output_shape, dt, 1, fixed_point_position);
+
+    // Fill reference
+    if(dt == DataType::F32)
+    {
+        std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+        library->fill(ref_src, distribution, 0);
+        library->fill(ref_weights, distribution, 1);
+        library->fill(ref_bias, distribution, 2);
+    }
+    else
+    {
+        library->fill_tensor_uniform(ref_src, 0);
+        library->fill_tensor_uniform(ref_weights, 1);
+        library->fill_tensor_uniform(ref_bias, 2);
+    }
+
+    // Compute reference
+    ReferenceCPP::convolution_layer(ref_src, ref_weights, ref_bias, ref_dst, conv_info);
+
+    return ref_dst;
+}
+
+RawTensor Reference::compute_reference_fully_connected_layer(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape,
+                                                             DataType dt, bool transpose_weights, int fixed_point_position)
+{
+    // Create reference
+    RawTensor ref_src  = library->get(input_shape, dt, 1, fixed_point_position);
+    RawTensor ref_bias = library->get(bias_shape, dt, 1, fixed_point_position);
+    RawTensor ref_dst  = library->get(output_shape, dt, 1, fixed_point_position);
+
+    // Swap the first and second dimension of weights' shape if transpose_weights is true
+    TensorShape ws = weights_shape;
+    if(transpose_weights)
+    {
+        const size_t dimx = ws.x();
+        ws.set(0, ws.y());
+        ws.set(1, dimx);
+    }
+
+    RawTensor ref_weights = library->get(ws, dt, 1, fixed_point_position);
+
+    // Fill reference
+    if(dt == DataType::F32)
+    {
+        std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+        library->fill(ref_src, distribution, 0);
+        library->fill(ref_weights, distribution, 1);
+        library->fill(ref_bias, distribution, 2);
+    }
+    else
+    {
+        library->fill_tensor_uniform(ref_src, 0);
+        library->fill_tensor_uniform(ref_weights, 1);
+        library->fill_tensor_uniform(ref_bias, 2);
+    }
+
+    // Compute reference
+    ReferenceCPP::fully_connected_layer(ref_src, ref_weights, ref_bias, ref_dst);
+
+    return ref_dst;
+}
+
+RawTensor Reference::compute_reference_normalization_layer(const TensorShape &shape, DataType dt, NormalizationLayerInfo norm_info, int fixed_point_position)
+{
+    // Create reference
+    RawTensor ref_src = library->get(shape, dt, 1, fixed_point_position);
+    RawTensor ref_dst = library->get(shape, dt, 1, fixed_point_position);
+
+    // Fill reference
+    if(dt == DataType::QS8)
+    {
+        const int8_t one_fixed_point       = 1 << fixed_point_position;
+        const int8_t minus_one_fixed_point = -one_fixed_point;
+        library->fill_tensor_uniform(ref_src, 0, minus_one_fixed_point, one_fixed_point);
+    }
+    else
+    {
+        library->fill_tensor_uniform(ref_src, 0);
+    }
+
+    // Compute reference
+    ReferenceCPP::normalization_layer(ref_src, ref_dst, norm_info);
+
+    return ref_dst;
+}
+
+RawTensor Reference::compute_reference_pooling_layer(const TensorShape &shape_in, const TensorShape &shape_out, DataType dt, PoolingLayerInfo pool_info, int fixed_point_position)
+{
+    // Create reference
+    RawTensor ref_src = library->get(shape_in, dt, 1, fixed_point_position);
+    RawTensor ref_dst = library->get(shape_out, dt, 1, fixed_point_position);
+
+    // Fill reference
+    int min = 0;
+    int max = 0;
+    switch(dt)
+    {
+        case DataType::F32:
+            min = -1;
+            max = 1;
+            break;
+        case DataType::QS8:
+            min = -(1 << fixed_point_position);
+            max = (1 << fixed_point_position);
+            break;
+        default:
+            ARM_COMPUTE_ERROR("DataType not supported.");
+    }
+    std::uniform_real_distribution<> distribution(min, max);
+    library->fill(ref_src, distribution, 0.0);
+
+    // Compute reference
+    ReferenceCPP::pooling_layer(ref_src, ref_dst, pool_info, fixed_point_position);
+
+    return ref_dst;
+}
+
+RawTensor Reference::compute_reference_softmax_layer(const TensorShape &shape, DataType dt, int fixed_point_position)
+{
+    // Create reference
+    RawTensor ref_src = library->get(shape, dt, 1, fixed_point_position);
+    RawTensor ref_dst = library->get(shape, dt, 1, fixed_point_position);
+
+    // Fill reference
+    if(arm_compute::is_data_type_float(dt))
+    {
+        std::uniform_real_distribution<> distribution(-10, 10);
+        library->fill(ref_src, distribution, 0);
+    }
+    else
+    {
+        int                             one_fixed = 1 << fixed_point_position;
+        std::uniform_int_distribution<> distribution(-one_fixed, one_fixed);
+        library->fill(ref_src, distribution, 0);
+    }
+
+    // Compute reference
+    ReferenceCPP::softmax_layer(ref_src, ref_dst);
+
+    return ref_dst;
+}
+
+RawTensor Reference::compute_reference_fixed_point_operation(const TensorShape &shape, DataType dt_in, DataType dt_out, FixedPointOp op, int fixed_point_position)
+{
+    // Create reference
+    RawTensor ref_src = library->get(shape, dt_in, 1, fixed_point_position);
+    RawTensor ref_dst = library->get(shape, dt_out, 1, fixed_point_position);
+
+    // Fill reference
+    int min = 0;
+    int max = 0;
+    switch(op)
+    {
+        case(FixedPointOp::INV_SQRT):
+            min = 32;
+            max = 127;
+            break;
+        case(FixedPointOp::LOG):
+            min = (1 << (fixed_point_position - 1));
+            max = 63;
+            break;
+        case(FixedPointOp::EXP):
+            min = 1;
+            max = (1 << (fixed_point_position - 1));
+            break;
+        case(FixedPointOp::RECIPROCAL):
+            min = 15;
+            max = 100;
+            break;
+        default:
+            ARM_COMPUTE_ERROR("Fixed point operation not supported");
+    }
+    std::uniform_int_distribution<> distribution(min, max);
+    library->fill(ref_src, distribution, 0);
+
+    // Compute reference
+    ReferenceCPP::fixed_point_operation(ref_src, ref_dst, op);
+
+    return ref_dst;
+}
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/Reference.h b/tests/validation/Reference.h
new file mode 100644
index 0000000000..4e5b462f9e
--- /dev/null
+++ b/tests/validation/Reference.h
@@ -0,0 +1,303 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_REFERENCE_REFERENCE_H__
+#define __ARM_COMPUTE_TEST_REFERENCE_REFERENCE_H__
+
+#include "RawTensor.h"
+#include "Types.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+/** Interface for reference implementations. */
+class Reference
+{
+public:
+    /** Compute reference integral image.
+     *
+     * @param[in] shape Shape of the input and output tensors.
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_integral_image(const TensorShape &shape);
+    /** Compute reference absolute difference.
+     *
+     * @param[in] shape  Shape of the input and output tensors.
+     * @param[in] dt_in0 Data type of first input tensor.
+     * @param[in] dt_in1 Data type of second input tensor.
+     * @param[in] dt_out Data type of the output tensor.
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_absolute_difference(const TensorShape &shape, DataType dt_in0, DataType dt_in1, DataType dt_out);
+    /** Compute reference accumulate.
+     *
+     * @param[in] shape Shape of the input and output tensors.
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_accumulate(const TensorShape &shape);
+    /** Compute reference accumulate.
+     *
+     * @param[in] shape Shape of the input and output tensors.
+     * @param[in] shift A uint32_t value within the range of [0, 15]
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_accumulate_squared(const TensorShape &shape, uint32_t shift);
+    /** Compute reference accumulate.
+     *
+     * @param[in] shape Shape of the input and output tensors.
+     * @param[in] alpha A float value within the range of [0, 1]
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_accumulate_weighted(const TensorShape &shape, float alpha);
+    /** Compute reference arithmetic addition.
+     *
+     * @param[in] shape          Shape of the input and output tensors.
+     * @param[in] dt_in0         Data type of first input tensor.
+     * @param[in] dt_in1         Data type of second input tensor.
+     * @param[in] dt_out         Data type of the output tensor.
+     * @param[in] convert_policy Overflow policy of the operation.
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_arithmetic_addition(const TensorShape &shape, DataType dt_in0, DataType dt_in1, DataType dt_out, ConvertPolicy convert_policy);
+    /** Compute reference arithmetic subtraction.
+     *
+     * @param[in] shape          Shape of the input and output tensors.
+     * @param[in] dt_in0         Data type of first input tensor.
+     * @param[in] dt_in1         Data type of second input tensor.
+     * @param[in] dt_out         Data type of the output tensor.
+     * @param[in] convert_policy Overflow policy of the operation.
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_arithmetic_subtraction(const TensorShape &shape, DataType dt_in0, DataType dt_in1, DataType dt_out, ConvertPolicy convert_policy);
+    /** Compute reference bitwise and.
+     *
+     * @param[in] shape Shape of the input and output tensors.
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_bitwise_and(const TensorShape &shape);
+    /** Compute reference bitwise or.
+     *
+     * @param[in] shape Shape of the input and output tensors.
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_bitwise_or(const TensorShape &shape);
+    /** Compute reference bitwise xor.
+     *
+     * @param[in] shape Shape of the input and output tensors.
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_bitwise_xor(const TensorShape &shape);
+    /** Compute reference bitwise not.
+     *
+     * @param[in] shape Shape of the input and output tensors.
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_bitwise_not(const TensorShape &shape);
+    /** Compute reference 3-by-3 box filter.
+     *
+     * @param[in] shape Shape of the input and output tensors.
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_box3x3(const TensorShape &shape);
+    /** Compute reference depth convert.
+     *
+     * @param[in] shape                Shape of the input and output tensors.
+     * @param[in] dt_in                Data type of input tensor.
+     * @param[in] dt_out               Data type of the output tensor.
+     * @param[in] policy               Overflow policy of the operation.
+     * @param[in] shift                Value for down/up conversions. Must be 0 <= shift < 8.
+     * @param[in] fixed_point_position Fixed point position.
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_depth_convert(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, uint32_t fixed_point_position);
+    /** Compute matrix multiply function.
+     *
+     * @param[in]  src_shape1           First input tensor shape
+     * @param[in]  src_shape2           Second input tensor shape
+     * @param[in]  src_shape3           Third input tensor shape
+     * @param[out] dst_shape            Output tensor.
+     * @param[in]  alpha                Weight of the matrix product
+     * @param[in]  beta                 Weight of the third matrix
+     * @param[in]  dt                   Tensor's data type
+     * @param[in]  fixed_point_position (Optional) Number of bits for the fractional part of the fixed point numbers
+     *
+     * @return Computed output tensor.
+     */
+    static RawTensor compute_reference_gemm(const TensorShape &src_shape1, const TensorShape &src_shape2, const TensorShape &src_shape3,
+                                            const TensorShape &dst_shape, float alpha, float beta, DataType dt, int fixed_point_position = 0);
+    /** Compute reference pixel-wise multiplication
+     *
+     * @param[in] shape           Shape of the input and output tensors.
+     * @param[in] dt_in0          Data type of first input tensor.
+     * @param[in] dt_in1          Data type of second input tensor.
+     * @param[in] dt_out          Data type of the output tensor.
+     * @param[in] scale           Non-negative scale.
+     * @param[in] convert_policy  Overflow policy of the operation.
+     * @param[in] rounding_policy Rounding policy of the operation.
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_pixel_wise_multiplication(const TensorShape &shape, DataType dt_in0, DataType dt_in1, DataType dt_out, float scale, ConvertPolicy convert_policy,
+                                                                 RoundingPolicy rounding_policy);
+    /** Compute reference pixel-wise multiplication.
+     *
+     * @param[in] shape                Shape of the input and output tensors.
+     * @param[in] dt_in0               Data type of first input tensor.
+     * @param[in] dt_in1               Data type of second input tensor.
+     * @param[in] dt_out               Data type of the output tensor.
+     * @param[in] scale                Scale to apply after multiplication. Must be positive.
+     * @param[in] fixed_point_position Fixed point position that expresses the number of bits for the fractional part of the number.
+     * @param[in] convert_policy       Overflow policy of the operation.
+     * @param[in] rounding_policy      Rounding policy of the operation.
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_fixed_point_pixel_wise_multiplication(const TensorShape &shape, DataType dt_in0, DataType dt_in1, DataType dt_out, float scale, int fixed_point_position,
+                                                                             ConvertPolicy convert_policy, RoundingPolicy rounding_policy);
+    /** Compute reference threshold.
+     *
+     * @param[in] shape       Shape of the input and output tensors.
+     * @param[in] threshold   Threshold. When the threshold type is RANGE, this is used as the lower threshold.
+     * @param[in] false_value value to set when the condition is not respected.
+     * @param[in] true_value  value to set when the condition is respected.
+     * @param[in] type        Thresholding type. Either RANGE or BINARY.
+     * @param[in] upper       Upper threshold. Only used when the thresholding type is RANGE.
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_threshold(const TensorShape &shape, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper);
+    /** Compute reference activation layer.
+     *
+     * @param[in] shape                Shape of the input and output tensors.
+     * @param[in] dt                   Data type of the tensors.
+     * @param[in] act_info             Activation layer information.
+     * @param[in] fixed_point_position (Optional)Number of bits for the fractional part of fixed point numbers.
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_activation_layer(const TensorShape &shape, DataType dt, ActivationLayerInfo act_info, int fixed_point_position = 0);
+    /** Compute reference batch normalization layer.
+     *
+     * @param[in] shape0               Shape of the input and output tensors.
+     * @param[in] shape1               Shape of the vector tensors.
+     * @param[in] dt                   Data type of all input and output tensors.
+     * @param[in] epsilon              Small value to avoid division with zero.
+     * @param[in] fixed_point_position Fixed point position.
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_batch_normalization_layer(const TensorShape &shape0, const TensorShape &shape1, DataType dt, float epsilon, int fixed_point_position = 0);
+    /** Compute reference pixel-wise multiplication
+     *
+     * @param[in] input_shape          Shape for the input tensor
+     * @param[in] weights_shape        Shape for the weights tensor
+     * @param[in] bias_shape           Shape for the bias tensor
+     * @param[in] output_shape         Shape for the output tensor
+     * @param[in] dt                   Data type to use
+     * @param[in] conv_info            Pads and strides information for the convolution layer
+     * @param[in] fixed_point_position Number of bits for the fractional part of the fixed point numbers
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_convolution_layer(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, DataType dt,
+                                                         const PadStrideInfo &conv_info, int fixed_point_position);
+    /** Compute reference for fully connected layer function
+     *
+     * @param[in] input_shape          Shape for the input tensor
+     * @param[in] weights_shape        Shape for the weights tensor
+     * @param[in] bias_shape           Shape for the bias tensor
+     * @param[in] output_shape         Shape for the output tensor
+     * @param[in] dt                   Data type to use
+     * @param[in] transpose_weights    Transpose the weights if true
+     * @param[in] fixed_point_position Number of bits for the fractional part of the fixed point numbers
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_fully_connected_layer(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, DataType dt,
+                                                             bool transpose_weights, int fixed_point_position);
+    /** Compute reference normalization layer.
+     *
+     * @param[in] shape                Shape of the input and output tensors.
+     * @param[in] dt                   Data type of input and output tensors.
+     * @param[in] norm_info            Normalization Layer information.
+     * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16 (default = 0).
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_normalization_layer(const TensorShape &shape, DataType dt, NormalizationLayerInfo norm_info, int fixed_point_position = 0);
+    /** Compute reference pooling layer.
+      *
+      * @param[in] shape_in             Shape of the input tensor.
+      * @param[in] shape_out            Shape of the output tensor.
+      * @param[in] dt                   Data type of input and output tensors.
+      * @param[in] pool_info            Pooling Layer information.
+      * @param[in] fixed_point_position (Optional) Number of bits for the fractional part of the fixed point numbers.
+      *
+      * @return Computed raw tensor.
+      */
+    static RawTensor compute_reference_pooling_layer(const TensorShape &shape_in, const TensorShape &shape_out, DataType dt, PoolingLayerInfo pool_info, int fixed_point_position = 0);
+    /** Compute reference softmax layer.
+     *
+     * @param[in] shape                Shape of the input and output tensors.
+     * @param[in] dt                   Data type of input and output tensors.
+     * @param[in] fixed_point_position (Optional) Number of bits for the fractional part of the fixed point numbers
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_softmax_layer(const TensorShape &shape, DataType dt, int fixed_point_position = 0);
+    /** Compute reference fixed point operation.
+     *
+     * @param[in] shape                Shape of the input and output tensors.
+     * @param[in] dt_in                Data type of the input tensor.
+     * @param[in] dt_out               Data type of the output tensor.
+     * @param[in] op                   Fixed point operation to perform.
+     * @param[in] fixed_point_position Number of bits for the fractional part of the fixed point numbers
+     *
+     * @return Computed raw tensor.
+     */
+    static RawTensor compute_reference_fixed_point_operation(const TensorShape &shape, DataType dt_in, DataType dt_out, FixedPointOp op, int fixed_point_position);
+
+protected:
+    Reference()  = default;
+    ~Reference() = default;
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/validation/ReferenceCPP.cpp b/tests/validation/ReferenceCPP.cpp
new file mode 100644
index 0000000000..ddb84835c3
--- /dev/null
+++ b/tests/validation/ReferenceCPP.cpp
@@ -0,0 +1,282 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ReferenceCPP.h"
+
+#include "TensorFactory.h"
+#include "TensorOperations.h"
+#include "TensorVisitors.h"
+#include "TypePrinter.h"
+
+#include "arm_compute/core/Coordinates.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/runtime/Tensor.h"
+
+#include "boost_wrapper.h"
+
+#include <functional>
+#include <numeric>
+#include <vector>
+
+using namespace arm_compute::test::validation::tensor_visitors;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+// Absolute difference
+void ReferenceCPP::absolute_difference(const RawTensor &src1, const RawTensor &src2, RawTensor &dst)
+{
+    const TensorVariant s1 = TensorFactory::get_tensor(src1);
+    const TensorVariant s2 = TensorFactory::get_tensor(src2);
+    TensorVariant       d  = TensorFactory::get_tensor(dst);
+    boost::apply_visitor(absolute_difference_visitor(), s1, s2, d);
+}
+// Integral image
+void ReferenceCPP::integral_image(const RawTensor &src, RawTensor &dst)
+{
+    ARM_COMPUTE_ERROR_ON(src.data_type() != DataType::U8 || dst.data_type() != DataType::U32);
+    const Tensor<uint8_t> s(src.shape(), src.data_type(), src.fixed_point_position(), reinterpret_cast<const uint8_t *>(src.data()));
+    Tensor<uint32_t>      d(dst.shape(), dst.data_type(), dst.fixed_point_position(), reinterpret_cast<uint32_t *>(dst.data()));
+    tensor_operations::integral_image(s, d);
+}
+// Accumulate
+void ReferenceCPP::accumulate(const RawTensor &src, RawTensor &dst)
+{
+    ARM_COMPUTE_ERROR_ON(src.data_type() != DataType::U8 || dst.data_type() != DataType::S16);
+    const Tensor<uint8_t> s(src.shape(), src.data_type(), src.fixed_point_position(), reinterpret_cast<const uint8_t *>(src.data()));
+    Tensor<int16_t>       d(dst.shape(), dst.data_type(), dst.fixed_point_position(), reinterpret_cast<int16_t *>(dst.data()));
+    tensor_operations::accumulate(s, d);
+}
+
+// Accumulate squared
+void ReferenceCPP::accumulate_squared(const RawTensor &src, RawTensor &dst, uint32_t shift)
+{
+    ARM_COMPUTE_ERROR_ON(src.data_type() != DataType::U8 || dst.data_type() != DataType::S16);
+    const Tensor<uint8_t> s(src.shape(), src.data_type(), src.fixed_point_position(), reinterpret_cast<const uint8_t *>(src.data()));
+    Tensor<int16_t>       d(dst.shape(), dst.data_type(), dst.fixed_point_position(), reinterpret_cast<int16_t *>(dst.data()));
+    tensor_operations::accumulate_squared(s, d, shift);
+}
+
+// Accumulate weighted
+void ReferenceCPP::accumulate_weighted(const RawTensor &src, RawTensor &dst, float alpha)
+{
+    ARM_COMPUTE_ERROR_ON(src.data_type() != DataType::U8 || dst.data_type() != DataType::U8);
+    const Tensor<uint8_t> s(src.shape(), src.data_type(), src.fixed_point_position(), reinterpret_cast<const uint8_t *>(src.data()));
+    Tensor<uint8_t>       d(dst.shape(), dst.data_type(), dst.fixed_point_position(), reinterpret_cast<uint8_t *>(dst.data()));
+    tensor_operations::accumulate_weighted(s, d, alpha);
+}
+
+// Arithmetic addition
+void ReferenceCPP::arithmetic_addition(const RawTensor &src1, const RawTensor &src2, RawTensor &dst, ConvertPolicy convert_policy)
+{
+    const TensorVariant s1 = TensorFactory::get_tensor(src1);
+    const TensorVariant s2 = TensorFactory::get_tensor(src2);
+    TensorVariant       d  = TensorFactory::get_tensor(dst);
+    boost::apply_visitor(arithmetic_addition_visitor(convert_policy), s1, s2, d);
+}
+
+// Arithmetic subtraction
+void ReferenceCPP::arithmetic_subtraction(const RawTensor &src1, const RawTensor &src2, RawTensor &dst, ConvertPolicy convert_policy)
+{
+    const TensorVariant s1 = TensorFactory::get_tensor(src1);
+    const TensorVariant s2 = TensorFactory::get_tensor(src2);
+    TensorVariant       d  = TensorFactory::get_tensor(dst);
+    boost::apply_visitor(arithmetic_subtraction_visitor(convert_policy), s1, s2, d);
+}
+
+// Bitwise and
+void ReferenceCPP::bitwise_and(const RawTensor &src1, const RawTensor &src2, RawTensor &dst)
+{
+    ARM_COMPUTE_ERROR_ON(src1.data_type() != DataType::U8 || src2.data_type() != DataType::U8 || dst.data_type() != DataType::U8);
+    const Tensor<uint8_t> s1(src1.shape(), src1.data_type(), src1.fixed_point_position(), reinterpret_cast<const uint8_t *>(src1.data()));
+    const Tensor<uint8_t> s2(src2.shape(), src2.data_type(), src2.fixed_point_position(), reinterpret_cast<const uint8_t *>(src2.data()));
+    Tensor<uint8_t>       d(dst.shape(), dst.data_type(), dst.fixed_point_position(), reinterpret_cast<uint8_t *>(dst.data()));
+    tensor_operations::bitwise_and(s1, s2, d);
+}
+
+// Bitwise or
+void ReferenceCPP::bitwise_or(const RawTensor &src1, const RawTensor &src2, RawTensor &dst)
+{
+    ARM_COMPUTE_ERROR_ON(src1.data_type() != DataType::U8 || src2.data_type() != DataType::U8 || dst.data_type() != DataType::U8);
+    const Tensor<uint8_t> s1(src1.shape(), src1.data_type(), src1.fixed_point_position(), reinterpret_cast<const uint8_t *>(src1.data()));
+    const Tensor<uint8_t> s2(src2.shape(), src2.data_type(), src2.fixed_point_position(), reinterpret_cast<const uint8_t *>(src2.data()));
+    Tensor<uint8_t>       d(dst.shape(), dst.data_type(), dst.fixed_point_position(), reinterpret_cast<uint8_t *>(dst.data()));
+    tensor_operations::bitwise_or(s1, s2, d);
+}
+
+// Bitwise xor
+void ReferenceCPP::bitwise_xor(const RawTensor &src1, const RawTensor &src2, RawTensor &dst)
+{
+    ARM_COMPUTE_ERROR_ON(src1.data_type() != DataType::U8 || src2.data_type() != DataType::U8 || dst.data_type() != DataType::U8);
+    const Tensor<uint8_t> s1(src1.shape(), src1.data_type(), src1.fixed_point_position(), reinterpret_cast<const uint8_t *>(src1.data()));
+    const Tensor<uint8_t> s2(src2.shape(), src2.data_type(), src2.fixed_point_position(), reinterpret_cast<const uint8_t *>(src2.data()));
+    Tensor<uint8_t>       d(dst.shape(), dst.data_type(), dst.fixed_point_position(), reinterpret_cast<uint8_t *>(dst.data()));
+    tensor_operations::bitwise_xor(s1, s2, d);
+}
+
+// Bitwise not
+void ReferenceCPP::bitwise_not(const RawTensor &src, RawTensor &dst)
+{
+    ARM_COMPUTE_ERROR_ON(src.data_type() != DataType::U8 || dst.data_type() != DataType::U8);
+    const Tensor<uint8_t> s(src.shape(), src.data_type(), src.fixed_point_position(), reinterpret_cast<const uint8_t *>(src.data()));
+    Tensor<uint8_t>       d(dst.shape(), dst.data_type(), dst.fixed_point_position(), reinterpret_cast<uint8_t *>(dst.data()));
+    tensor_operations::bitwise_not(s, d);
+}
+
+// 3-by-3 box filter
+void ReferenceCPP::box3x3(const RawTensor &src, RawTensor &dst)
+{
+    ARM_COMPUTE_ERROR_ON(src.data_type() != DataType::U8 || dst.data_type() != DataType::U8);
+    const Tensor<uint8_t> s(src.shape(), src.data_type(), src.fixed_point_position(), reinterpret_cast<const uint8_t *>(src.data()));
+    Tensor<uint8_t>       d(dst.shape(), dst.data_type(), dst.fixed_point_position(), reinterpret_cast<uint8_t *>(dst.data()));
+    tensor_operations::box3x3(s, d);
+}
+
+// Depth conversion
+void ReferenceCPP::depth_convert(const RawTensor &src, RawTensor &dst, ConvertPolicy policy, uint32_t shift)
+{
+    const TensorVariant s = TensorFactory::get_tensor(src);
+    TensorVariant       d = TensorFactory::get_tensor(dst);
+    boost::apply_visitor(tensor_visitors::depth_convert_visitor(policy, shift), s, d);
+}
+
+// GEMM
+void ReferenceCPP::gemm(const RawTensor &src1, const RawTensor &src2, const RawTensor &src3,
+                        RawTensor &dst, float alpha, float beta)
+{
+    const TensorVariant s1 = TensorFactory::get_tensor(src1);
+    const TensorVariant s2 = TensorFactory::get_tensor(src2);
+    const TensorVariant s3 = TensorFactory::get_tensor(src3);
+    TensorVariant       d  = TensorFactory::get_tensor(dst);
+
+    boost::apply_visitor(tensor_visitors::gemm_visitor(s1, s2, s3, alpha, beta), d);
+}
+
+// Pixel-wise multiplication
+void ReferenceCPP::pixel_wise_multiplication(const RawTensor &src1, const RawTensor &src2, RawTensor &dst, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
+{
+    const TensorVariant s1 = TensorFactory::get_tensor(src1);
+    const TensorVariant s2 = TensorFactory::get_tensor(src2);
+    TensorVariant       d  = TensorFactory::get_tensor(dst);
+    boost::apply_visitor(pixel_wise_multiplication_visitor(scale, convert_policy, rounding_policy), s1, s2, d);
+}
+
+// Fixed-point Pixel-wise multiplication
+void ReferenceCPP::fixed_point_pixel_wise_multiplication(const RawTensor &src1, const RawTensor &src2, RawTensor &dst, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
+{
+    const TensorVariant s1 = TensorFactory::get_tensor(src1);
+    const TensorVariant s2 = TensorFactory::get_tensor(src2);
+    TensorVariant       d  = TensorFactory::get_tensor(dst);
+    boost::apply_visitor(tensor_visitors::fixed_point_pixel_wise_multiplication_visitor(s1, s2, scale, convert_policy, rounding_policy), d);
+}
+
+// Threshold
+void ReferenceCPP::threshold(const RawTensor &src, RawTensor &dst, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper)
+{
+    ARM_COMPUTE_ERROR_ON(src.data_type() != DataType::U8 || dst.data_type() != DataType::U8);
+    const Tensor<uint8_t> s(src.shape(), src.data_type(), src.fixed_point_position(), reinterpret_cast<const uint8_t *>(src.data()));
+    Tensor<uint8_t>       d(dst.shape(), dst.data_type(), dst.fixed_point_position(), reinterpret_cast<uint8_t *>(dst.data()));
+    threshold_operation(s, d, threshold, false_value, true_value, type, upper);
+}
+
+// Activation layer
+void ReferenceCPP::activation_layer(const RawTensor &input, RawTensor &output, ActivationLayerInfo act_info)
+{
+    const TensorVariant s = TensorFactory::get_tensor(input);
+    TensorVariant       d = TensorFactory::get_tensor(output);
+    boost::apply_visitor(tensor_visitors::activation_layer_visitor(s, act_info), d);
+}
+
+// Batch Normalization Layer
+void ReferenceCPP::batch_normalization_layer(const RawTensor &src, RawTensor &dst, const RawTensor &mean, const RawTensor &var, const RawTensor &beta, const RawTensor &gamma, float epsilon,
+                                             int fixed_point_position)
+{
+    const TensorVariant s = TensorFactory::get_tensor(src);
+    TensorVariant       d = TensorFactory::get_tensor(dst);
+    const TensorVariant m = TensorFactory::get_tensor(mean);
+    const TensorVariant v = TensorFactory::get_tensor(var);
+    const TensorVariant b = TensorFactory::get_tensor(beta);
+    const TensorVariant g = TensorFactory::get_tensor(gamma);
+    boost::apply_visitor(tensor_visitors::batch_normalization_layer_visitor(s, m, v, b, g, epsilon, fixed_point_position), d);
+}
+
+// Convolution Layer
+void ReferenceCPP::convolution_layer(const RawTensor &src, const RawTensor &weights, const RawTensor &bias, RawTensor &dst, const PadStrideInfo &conv_info)
+{
+    const TensorVariant s = TensorFactory::get_tensor(src);
+    const TensorVariant w = TensorFactory::get_tensor(weights);
+    const TensorVariant b = TensorFactory::get_tensor(bias);
+    TensorVariant       d = TensorFactory::get_tensor(dst);
+    boost::apply_visitor(tensor_visitors::convolution_layer_visitor(s, w, b, conv_info), d);
+}
+
+// Fully connected layer
+void ReferenceCPP::fully_connected_layer(const RawTensor &src, const RawTensor &weights, const RawTensor &bias, RawTensor &dst)
+{
+    const TensorVariant s = TensorFactory::get_tensor(src);
+    const TensorVariant w = TensorFactory::get_tensor(weights);
+    const TensorVariant b = TensorFactory::get_tensor(bias);
+    TensorVariant       d = TensorFactory::get_tensor(dst);
+    boost::apply_visitor(tensor_visitors::fully_connected_layer_visitor(s, w, b), d);
+}
+
+// Normalization Layer
+void ReferenceCPP::normalization_layer(const RawTensor &src, RawTensor &dst, NormalizationLayerInfo norm_info)
+{
+    const TensorVariant s = TensorFactory::get_tensor(src);
+    TensorVariant       d = TensorFactory::get_tensor(dst);
+    boost::apply_visitor(tensor_visitors::normalization_layer_visitor(s, norm_info), d);
+}
+
+// Pooling Layer
+void ReferenceCPP::pooling_layer(const RawTensor &src, RawTensor &dst, PoolingLayerInfo pool_info, int fixed_point_position)
+{
+    const TensorVariant s = TensorFactory::get_tensor(src);
+    TensorVariant       d = TensorFactory::get_tensor(dst);
+    boost::apply_visitor(tensor_visitors::pooling_layer_visitor(s, pool_info, fixed_point_position), d);
+}
+
+// Softmax Layer
+void ReferenceCPP::softmax_layer(const RawTensor &src, RawTensor &dst)
+{
+    const TensorVariant s = TensorFactory::get_tensor(src);
+    TensorVariant       d = TensorFactory::get_tensor(dst);
+    boost::apply_visitor(tensor_visitors::softmax_layer_visitor(s), d);
+}
+
+// Fixed point operation
+void ReferenceCPP::fixed_point_operation(const RawTensor &src, RawTensor &dst, FixedPointOp op)
+{
+    const TensorVariant s = TensorFactory::get_tensor(src);
+    TensorVariant       d = TensorFactory::get_tensor(dst);
+    boost::apply_visitor(tensor_visitors::fixed_point_operation_visitor(s, op), d);
+}
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/ReferenceCPP.h b/tests/validation/ReferenceCPP.h
new file mode 100644
index 0000000000..be5a733896
--- /dev/null
+++ b/tests/validation/ReferenceCPP.h
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_REFERENCE_REFERENCE_CPP_H__
+#define __ARM_COMPUTE_TEST_REFERENCE_REFERENCE_CPP_H__
+
+#include "Reference.h"
+
+#include "RawTensor.h"
+
+#include <ostream>
+
+namespace arm_compute
+{
+class Tensor;
+
+namespace test
+{
+namespace validation
+{
+/** C++ reference implementation. */
+class ReferenceCPP final : public Reference
+{
+public:
+    /** Function to compute the integral image of a tensor.
+     *
+     * @param[in]  src Input tensor.
+     * @param[out] dst Result tensor.
+     */
+    static void integral_image(const RawTensor &src, RawTensor &dst);
+    /** Function to compute the absolute difference between two tensors.
+     *
+     * @param[in]  src1 First tensor.
+     * @param[in]  src2 Second tensor.
+     * @param[out] dst  Result tensor.
+     */
+    static void absolute_difference(const RawTensor &src1, const RawTensor &src2, RawTensor &dst);
+    /** Function to accumulate an input tensor into an output tensor.
+     *
+     * @param[in]      src Input tensor.
+     * @param[in, out] dst Result tensor.
+     */
+    static void accumulate(const RawTensor &src, RawTensor &dst);
+    /** Function to accumulate a squared value from an input tensor to an output tensor.
+     *
+     * @param[in]      src   Input tensor.
+     * @param[in, out] dst   Result tensor.
+     * @param[in]      shift A uint32_t value within the range of [0, 15]
+     */
+    static void accumulate_squared(const RawTensor &src, RawTensor &dst, uint32_t shift);
+    /** Function to accumulate a weighted value from an input tensor to an output tensor.
+     *
+     * @param[in]      src   Input tensor.
+     * @param[in, out] dst   Result tensor.
+     * @param[in]      alpha A float value within the range of [0, 1]
+     */
+    static void accumulate_weighted(const RawTensor &src, RawTensor &dst, float alpha);
+    /** Arithmetic addition of @p src1 and @p src2
+     *
+     * @param[in]  src1           First tensor.
+     * @param[in]  src2           Second tensor.
+     * @param[out] dst            Result tensor.
+     * @param[in]  convert_policy Overflow policy.
+     */
+    static void arithmetic_addition(const RawTensor &src1, const RawTensor &src2, RawTensor &dst, ConvertPolicy convert_policy);
+    /** Arithmetic subtraction of @p src2 from @p src1
+     *
+     * @param[in]  src1           First tensor.
+     * @param[in]  src2           Second tensor.
+     * @param[out] dst            Result tensor.
+     * @param[in]  convert_policy Overflow policy.
+     */
+    static void arithmetic_subtraction(const RawTensor &src1, const RawTensor &src2, RawTensor &dst, ConvertPolicy convert_policy);
+    /** Function to compute the bitwise and between two tensors.
+     *
+     * @param[in]  src1 First tensor.
+     * @param[in]  src2 Second tensor.
+     * @param[out] dst  Result tensor.
+     */
+    static void bitwise_and(const RawTensor &src1, const RawTensor &src2, RawTensor &dst);
+    /** Function to compute the bitwise or between two tensors.
+     *
+     * @param[in]  src1 First tensor.
+     * @param[in]  src2 Second tensor.
+     * @param[out] dst  Result tensor.
+     */
+    static void bitwise_or(const RawTensor &src1, const RawTensor &src2, RawTensor &dst);
+    /** Function to compute the bitwise xor between two tensors.
+     *
+     * @param[in]  src1 First tensor.
+     * @param[in]  src2 Second tensor.
+     * @param[out] dst  Result tensor.
+     */
+    static void bitwise_xor(const RawTensor &src1, const RawTensor &src2, RawTensor &dst);
+    /** Function to compute the bitwise not of a tensor.
+     *
+     * @param[in]  src Input tensor.
+     * @param[out] dst Result tensor.
+     */
+    static void bitwise_not(const RawTensor &src, RawTensor &dst);
+    /** Function to compute 3-by-3 box filtered result tensor.
+     *
+     * @param[in]  src Input tensor.
+     * @param[out] dst Result tensor.
+     */
+    static void box3x3(const RawTensor &src, RawTensor &dst);
+    /** Depth conversion from @p src to @p dst
+     *
+     * @param[in]  src    First tensor.
+     * @param[out] dst    Result tensor.
+     * @param[in]  policy Overflow policy.
+     * @param[in]  shift  Value for down/up conversions.
+     */
+    static void depth_convert(const RawTensor &src, RawTensor &dst, ConvertPolicy policy, uint32_t shift);
+    /** Compute GEMM function.
+     *
+     * @param[in]  src1  First input tensor
+     * @param[in]  src2  Second input tensor
+     * @param[in]  src3  Third input tensor
+     * @param[out] dst   Output tensr
+     * @param[in]  alpha Weight of the matrix product
+     * @param[in]  beta  Weight of the third matrix
+     */
+    static void gemm(const RawTensor &src1, const RawTensor &src2, const RawTensor &src3,
+                     RawTensor &dst, float alpha, float beta);
+    /** Element-wise multiplication of @p src1, @p src2 and @p scale
+     *
+     * @param[in]  src1            First tensor.
+     * @param[in]  src2            Second tensor.
+     * @param[out] dst             Result tensor.
+     * @param[in]  scale           A non-negative float multiplied to each product.
+     * @param[in]  convert_policy  Overflow policy.
+     * @param[in]  rounding_policy Rounding policy.
+     */
+    static void pixel_wise_multiplication(const RawTensor &src1, const RawTensor &src2, RawTensor &dst, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy);
+    /** Fixed-point Pixel-wise multiplication of @p src1 by @p src2
+     *
+     * @param[in]  src1            First tensor.
+     * @param[in]  src2            Second tensor.
+     * @param[out] dst             Result tensor.
+     * @param[in]  scale           A non-negative float multiplied to each product.
+     * @param[in]  convert_policy  Overflow policy.
+     * @param[in]  rounding_policy Rounding policy.
+     */
+    static void fixed_point_pixel_wise_multiplication(const RawTensor &src1, const RawTensor &src2, RawTensor &dst, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy);
+    /** Threshold of@p src to @p dst
+     *
+     * @param[in]  src         First tensor.
+     * @param[out] dst         Result tensor.
+     * @param[in]  threshold   Threshold. When the threhold type is RANGE, this is used as the lower threshold.
+     * @param[in]  false_value value to set when the condition is not respected.
+     * @param[in]  true_value  value to set when the condition is respected.
+     * @param[in]  type        Thresholding type. Either RANGE or BINARY.
+     * @param[in]  upper       Upper threshold. Only used when the thresholding type is RANGE.
+     */
+    static void threshold(const RawTensor &src, RawTensor &dst, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper);
+    /** Activation layer of @p src base on information from @p act_info.
+     *
+     * @param[in]  input    Input tensor.
+     * @param[in]  output   Second tensor.
+     * @param[out] act_info Activation layer information.
+     */
+    static void activation_layer(const RawTensor &input, RawTensor &output, ActivationLayerInfo act_info);
+    /** Batch Normalization of @p src based on the information from @p norm_info.
+     *
+     * @param[in]  src                  Input tensor.
+     * @param[out] dst                  Result tensor.
+     * @param[out] mean                 Mean vector tensor.
+     * @param[out] var                  Var vector tensor.
+     * @param[out] beta                 Beta vector tensor.
+     * @param[out] gamma                Gamma vector tensor.
+     * @param[in]  epsilon              Small value to avoid division with zero.
+     * @param[in]  fixed_point_position Fixed point position.
+     */
+    static void batch_normalization_layer(const RawTensor &src, RawTensor &dst, const RawTensor &mean, const RawTensor &var, const RawTensor &beta, const RawTensor &gamma, float epsilon,
+                                          int fixed_point_position = 0);
+    /** Convolution layer function
+     *
+     * @param[in]  src       Input tensor.
+     * @param[in]  weights   Weights tensor.
+     * @param[in]  bias      Bias tensor.
+     * @param[out] dst       Result tensor.
+     * @param[in]  conv_info Pads and strides information for the convolution layer.
+     */
+    static void convolution_layer(const RawTensor &src, const RawTensor &weights, const RawTensor &bias, RawTensor &dst, const PadStrideInfo &conv_info);
+    /** Fully connected layer function
+     *
+     * @param[in]  src     Input tensor
+     * @param[in]  weights Weights tensor.
+     * @param[in]  bias    Bias tensor.
+     * @param[out] dst     Result tensor.
+     */
+    static void fully_connected_layer(const RawTensor &src, const RawTensor &weights, const RawTensor &bias, RawTensor &dst);
+    /** Normalization of @p src based on the information from @p norm_info.
+     *
+     * @param[in]  src       Input tensor.
+     * @param[out] dst       Result tensor.
+     * @param[in]  norm_info Normalization Layer information.
+     */
+    static void normalization_layer(const RawTensor &src, RawTensor &dst, NormalizationLayerInfo norm_info);
+    /** Pooling layer of @p src based on the information from @p norm_info.
+     *
+     * @param[in]  src                  Input tensor.
+     * @param[out] dst                  Result tensor.
+     * @param[in]  pool_info            Pooling Layer information.
+     * @param[in]  fixed_point_position Fixed point position. (Optional)
+     */
+    static void pooling_layer(const RawTensor &src, RawTensor &dst, PoolingLayerInfo pool_info, int fixed_point_position = 0);
+    /** Softmax Layer of @p src.
+     *
+     * @param[in]  src Input tensor.
+     * @param[out] dst Result tensor.
+     */
+    static void softmax_layer(const RawTensor &src, RawTensor &dst);
+    /** Fixed point operations of @p src
+     *
+     * @param[in]  src Input tensor.
+     * @param[out] dst Result tensor.
+     * @param[in]  op  Fixed point operation to perform.
+     */
+    static void fixed_point_operation(const RawTensor &src, RawTensor &dst, FixedPointOp op);
+
+private:
+    ReferenceCPP()  = delete;
+    ~ReferenceCPP() = delete;
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/validation/Tensor.h b/tests/validation/Tensor.h
new file mode 100644
index 0000000000..81066b40ad
--- /dev/null
+++ b/tests/validation/Tensor.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_TENSOR_H__
+#define __ARM_COMPUTE_TEST_TENSOR_H__
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename T>
+class Tensor
+{
+public:
+    Tensor()
+        : _shape(), _dt(DataType::UNKNOWN), _fixed_point_position(0), _ptr(nullptr), _ptr_const(nullptr) {};
+
+    Tensor(TensorShape shape, DataType dt, int fixed_point_position, T *ptr)
+        : _shape(shape), _dt(dt), _fixed_point_position(fixed_point_position), _ptr(ptr), _ptr_const(nullptr) {};
+
+    Tensor(TensorShape shape, DataType dt, int fixed_point_position, const T *ptr)
+        : _shape(shape), _dt(dt), _fixed_point_position(fixed_point_position), _ptr(nullptr), _ptr_const(ptr) {};
+
+    Tensor(const Tensor &tensor) = delete;
+    Tensor &operator=(const Tensor &) = delete;
+    Tensor(Tensor &&)                 = default;
+    Tensor &operator=(Tensor &&) = default;
+
+    ~Tensor() = default;
+
+    T &operator[](size_t offset)
+    {
+        return _ptr[offset];
+    }
+
+    const T &operator[](size_t offset) const
+    {
+        return _ptr_const[offset];
+    }
+
+    int num_elements() const
+    {
+        return std::accumulate(_shape.cbegin(), _shape.cend(), 1, std::multiplies<int>());
+    }
+
+    TensorShape shape() const
+    {
+        return _shape;
+    }
+
+    DataType data_type() const
+    {
+        return _dt;
+    }
+
+    int fixed_point_position() const
+    {
+        return _fixed_point_position;
+    }
+
+    const T *data() const
+    {
+        return (_ptr != nullptr) ? _ptr : _ptr_const;
+    }
+    T *data()
+    {
+        return _ptr;
+    }
+
+    const T *data_const()
+    {
+        return _ptr_const;
+    }
+
+private:
+    TensorShape _shape;
+    DataType    _dt;
+    int         _fixed_point_position;
+    T          *_ptr;
+    const T    *_ptr_const;
+};
+} // namespace validation
+} // test
+} // arm_compute
+
+#endif /* __ARM_COMPUTE_TEST_TENSOR_H__ */
diff --git a/tests/validation/TensorFactory.h b/tests/validation/TensorFactory.h
new file mode 100644
index 0000000000..48f9d6702f
--- /dev/null
+++ b/tests/validation/TensorFactory.h
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_TENSOR_FACTORY_H__
+#define __ARM_COMPUTE_TEST_TENSOR_FACTORY_H__
+
+#include "RawTensor.h"
+#include "Tensor.h"
+#include "arm_compute/core/Error.h"
+
+#include "boost_wrapper.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+using TensorVariant = boost::variant < Tensor<uint8_t>, Tensor<int8_t>,
+      Tensor<uint16_t>, Tensor<int16_t>,
+      Tensor<uint32_t>, Tensor<int32_t>,
+#ifdef ENABLE_FP16
+      Tensor<float16_t>,
+#endif
+      Tensor<float >>;
+
+/** Helper to create a constant type if the passed reference is constant. */
+template <typename R, typename T>
+struct match_const
+{
+    using type = typename std::conditional<std::is_const<typename std::remove_reference<R>::type>::value, const T, T>::type;
+};
+
+class TensorFactory
+{
+public:
+    template <typename R>
+    static TensorVariant get_tensor(R &&raw)
+    {
+        TensorVariant v;
+        DataType      dt                   = raw.data_type();
+        int           fixed_point_position = raw.fixed_point_position();
+        auto          shape                = raw.shape();
+        auto          data                 = raw.data();
+
+        switch(dt)
+        {
+            case DataType::U8:
+                using value_type_u8 = typename match_const<R, uint8_t>::type;
+                v                   = Tensor<uint8_t>(shape, dt, fixed_point_position, reinterpret_cast<value_type_u8 *>(data));
+                break;
+            case DataType::S8:
+            case DataType::QS8:
+                using value_type_s8 = typename match_const<R, int8_t>::type;
+                v                   = Tensor<int8_t>(shape, dt, fixed_point_position, reinterpret_cast<value_type_s8 *>(data));
+                break;
+            case DataType::U16:
+                using value_type_u16 = typename match_const<R, uint16_t>::type;
+                v                    = Tensor<uint16_t>(shape, dt, fixed_point_position, reinterpret_cast<value_type_u16 *>(data));
+                break;
+            case DataType::S16:
+                using value_type_s16 = typename match_const<R, int16_t>::type;
+                v                    = Tensor<int16_t>(shape, dt, fixed_point_position, reinterpret_cast<value_type_s16 *>(data));
+                break;
+            case DataType::U32:
+                using value_type_u32 = typename match_const<R, uint32_t>::type;
+                v                    = Tensor<uint32_t>(shape, dt, fixed_point_position, reinterpret_cast<value_type_u32 *>(data));
+                break;
+            case DataType::S32:
+                using value_type_s32 = typename match_const<R, int32_t>::type;
+                v                    = Tensor<int32_t>(shape, dt, fixed_point_position, reinterpret_cast<value_type_s32 *>(data));
+                break;
+#ifdef ENABLE_FP16
+            case DataType::F16:
+                using value_type_f16 = typename match_const<R, float16_t>::type;
+                v                    = Tensor<float16_t>(raw.shape(), dt, reinterpret_cast<value_type_f16 *>(raw.data()));
+                break;
+#endif
+            case DataType::F32:
+                using value_type_f32 = typename match_const<R, float>::type;
+                v                    = Tensor<float>(shape, dt, fixed_point_position, reinterpret_cast<value_type_f32 *>(data));
+                break;
+            default:
+                ARM_COMPUTE_ERROR("NOT SUPPORTED!");
+        }
+        return v;
+    }
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+
+#endif /* __ARM_COMPUTE_TEST_TENSOR_FACTORY_H__ */
diff --git a/tests/validation/TensorOperations.h b/tests/validation/TensorOperations.h
new file mode 100644
index 0000000000..5e27e9d3a0
--- /dev/null
+++ b/tests/validation/TensorOperations.h
@@ -0,0 +1,1370 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_TENSOR_OPERATIONS_H__
+#define __ARM_COMPUTE_TEST_TENSOR_OPERATIONS_H__
+
+#include "FixedPoint.h"
+#include "Tensor.h"
+#include "Types.h"
+#include "Utils.h"
+
+#include "FixedPoint.h"
+#include "Types.h"
+#include "arm_compute/core/FixedPoint.h"
+#include "arm_compute/core/Types.h"
+#include "tests/validation/FixedPoint.h"
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace tensor_operations
+{
+namespace
+{
+bool is_valid_pixel(int i, int min, int max)
+{
+    return (i >= min && i < max);
+}
+
+// 3D convolution for floating point type
+template <typename T, typename std::enable_if<std::is_floating_point<T>::value, int>::type * = nullptr>
+void convolution3d(const T *in, const T *weights, const T *bias, T *out, int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights, int8_t fixed_point_position)
+{
+    const int half_width_weights  = width_weights / 2;
+    const int half_height_weights = height_weights / 2;
+
+    // Reset accumulator
+    T acc = static_cast<T>(0);
+
+    // Compute a 2D convolution for each IFM and accumulate the result
+    for(int ifm = 0; ifm < depth_in; ++ifm)
+    {
+        // Compute the offset for the input slice
+        const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
+
+        // Compute 2D convolution
+        for(int yk = -half_height_weights; yk <= half_height_weights; ++yk)
+        {
+            for(int xk = -half_width_weights; xk <= half_width_weights; ++xk)
+            {
+                // Check if the pixel is out-of-bound
+                if(is_valid_pixel(xi + xk, 0, width_in) && is_valid_pixel(yi + yk, 0, height_in))
+                {
+                    const int idx = xk + half_width_weights;
+                    const int idy = yk + half_height_weights;
+
+                    const T i_value = in[offset_slice_in + xk + yk * width_in];
+                    const T w_value = weights[idx + idy * width_weights + ifm * width_weights * height_weights];
+
+                    acc += i_value * w_value;
+                }
+            }
+        }
+    }
+
+    // Accumulate the bias and store the result
+    *out = acc + (*bias);
+}
+
+// 3D convolution for fixed point type
+template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type * = nullptr>
+void convolution3d(const T *in, const T *weights, const T *bias, T *out, int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights,
+                   int8_t fixed_point_position)
+{
+    const int half_width_weights  = width_weights / 2;
+    const int half_height_weights = height_weights / 2;
+
+    using namespace fixed_point_arithmetic;
+    using promoted_type = typename fixed_point_arithmetic::traits::promote<T>::type;
+
+    // Reset accumulator
+    fixed_point<promoted_type> acc(0, fixed_point_position);
+
+    // Compute a 2D convolution for each IFM and accumulate the result
+    for(int ifm = 0; ifm < depth_in; ++ifm)
+    {
+        // Compute the offset for the input slice
+        const int offset_slice_in = xi + yi * width_in + ifm * width_in * height_in;
+
+        // Compute 2D convolution
+        for(int yk = -half_height_weights; yk <= half_height_weights; ++yk)
+        {
+            for(int xk = -half_width_weights; xk <= half_width_weights; ++xk)
+            {
+                // Check if the pixel is out-of-bound
+                if(is_valid_pixel(xi + xk, 0, width_in) && is_valid_pixel(yi + yk, 0, height_in))
+                {
+                    const int idx = xk + half_width_weights;
+                    const int idy = yk + half_height_weights;
+
+                    const fixed_point<promoted_type> i_value(in[offset_slice_in + xk + yk * width_in], fixed_point_position, true);
+                    const fixed_point<promoted_type> w_value(weights[idx + idy * width_weights + ifm * width_weights * height_weights], fixed_point_position, true);
+                    const fixed_point<promoted_type> iw = i_value * w_value;
+                    acc                                 = iw + acc;
+                }
+            }
+        }
+    }
+
+    // Get the bias
+    const fixed_point<promoted_type> b(*bias, fixed_point_position, true);
+
+    // Accumulate the bias and covert back
+    acc = acc + b;
+    fixed_point<T> res(acc);
+    *out = res.raw();
+}
+
+template <typename T>
+void vector_matrix_multiply(const T *in, const T *weights, const T *bias, T *out, int cols_weights, int rows_weights, uint8_t fixed_point_position)
+{
+    for(int x = 0; x < cols_weights; ++x)
+    {
+        T acc = 0.0f;
+        for(int y = 0; y < rows_weights; ++y)
+        {
+            acc += in[y] * weights[x + y * cols_weights];
+        }
+        out[x] = acc + bias[x];
+    }
+}
+
+template <>
+void vector_matrix_multiply(const int8_t *in, const int8_t *weights, const int8_t *bias, int8_t *out, int cols_weights, int rows_weights, uint8_t fixed_point_position)
+{
+    using namespace fixed_point_arithmetic;
+    using promoted_type = typename fixed_point_arithmetic::traits::promote<int8_t>::type;
+
+    for(int x = 0; x < cols_weights; ++x)
+    {
+        // Reset accumulator
+        fixed_point<promoted_type> acc(0, fixed_point_position);
+
+        for(int y = 0; y < rows_weights; ++y)
+        {
+            const fixed_point<promoted_type> i_value(in[y], fixed_point_position, true);
+            const fixed_point<promoted_type> w_value(weights[x + y * cols_weights], fixed_point_position, true);
+            const fixed_point<promoted_type> iw = i_value * w_value;
+            acc                                 = iw + acc;
+        }
+
+        // Get the bias
+        const fixed_point<int8_t> b(bias[x], fixed_point_position, true);
+
+        // Convert back and accumulate the bias
+        fixed_point<int8_t> res(acc);
+        res = res + b;
+
+        // Store the result
+        out[x] = res.raw();
+    }
+}
+
+/** Apply 2D spatial filter on a single element of @p in at coordinates @p coord
+ *
+ * - filter sizes have to be odd number
+ * - Valid region assumed
+ * - Row major order of filter assumed
+ * - TO_ZERO rounding policy assumed
+ * - SATURATE convert policy assumed
+ *
+ */
+template <typename T1, typename T2, typename T3>
+void apply_2d_spatial_filter(Coordinates coord, const Tensor<T1> &in, Tensor<T3> &out, const TensorShape &filter_shape, const T2 *filter_itr, float scale)
+{
+    using intermediate_type = typename common_promoted_signed_type<T1, T2, T3>::intermediate_type;
+    intermediate_type val   = 0;
+    int               x     = coord.x();
+    int               y     = coord.y();
+    for(size_t j = y - filter_shape[1] / 2; j <= y + filter_shape[1] / 2; ++j)
+    {
+        for(size_t i = x - filter_shape[0] / 2; i <= x + filter_shape[0] / 2; ++i)
+        {
+            coord.set(0, i);
+            coord.set(1, j);
+            val += static_cast<intermediate_type>(*filter_itr) * static_cast<intermediate_type>(in[coord2index(in.shape(), coord)]);
+            ++filter_itr;
+        }
+    }
+    coord.set(0, x);
+    coord.set(1, y);
+    double rounded_val = cpp11::trunc(val * static_cast<double>(scale));
+    out[coord2index(in.shape(), coord)] = saturate_cast<T3>(rounded_val);
+}
+} // namespace
+
+// Integral Image
+void integral_image(const Tensor<uint8_t> &in, Tensor<uint32_t> &out)
+{
+    // Length of dimensions
+    const size_t width  = in.shape().x();
+    const size_t height = in.shape().y();
+    const size_t depth  = in.shape().z() * in.shape()[3] * in.shape()[4] * in.shape()[5];
+
+    const size_t image_size = width * height;
+
+    for(size_t z = 0; z < depth; ++z)
+    {
+        size_t current_image = z * image_size;
+
+        //First element of each image
+        out[current_image] = in[current_image];
+
+        // First row of each image (add only pixel on the left)
+        for(size_t x = 1; x < width; ++x)
+        {
+            out[current_image + x] = static_cast<uint32_t>(in[current_image + x]) + out[current_image + x - 1];
+        }
+
+        // Subsequent rows
+        for(size_t y = 1; y < height; ++y)
+        {
+            size_t current_row = current_image + (width * y);
+
+            // First element of each row (add only pixel up)
+            out[current_row] = static_cast<uint32_t>(in[current_row]) + out[current_row - width];
+
+            // Following row elements
+            for(size_t x = 1; x < width; ++x)
+            {
+                size_t current_pixel = current_row + x;
+
+                // out = in + up(out) + left(out) - up_left(out)
+                out[current_pixel] = static_cast<uint32_t>(in[current_pixel]) + out[current_pixel - 1]
+                                     + out[current_pixel - width] - out[current_pixel - width - 1];
+            }
+        }
+    }
+}
+
+// Absolute difference
+template <typename T1, typename T2, typename T3>
+void absolute_difference(const Tensor<T1> &in1, const Tensor<T2> &in2, Tensor<T3> &out)
+{
+    using intermediate_type = typename common_promoted_signed_type<T1, T2, T3>::intermediate_type;
+
+    for(int i = 0; i < in1.num_elements(); ++i)
+    {
+        intermediate_type val = std::abs(static_cast<intermediate_type>(in1[i]) - static_cast<intermediate_type>(in2[i]));
+        out[i]                = saturate_cast<T3>(val);
+    }
+}
+
+// Accumulate
+template <typename T1, typename T2>
+void accumulate(const Tensor<T1> &in, Tensor<T2> &out)
+{
+    using intermediate_type = typename common_promoted_signed_type<T1, T2>::intermediate_type;
+
+    for(int i = 0; i < in.num_elements(); ++i)
+    {
+        intermediate_type val = static_cast<intermediate_type>(out[i]) + static_cast<intermediate_type>(in[i]);
+        out[i]                = saturate_cast<T2>(val);
+    }
+}
+
+// Accumulate squared
+template <typename T1, typename T2>
+void accumulate_squared(const Tensor<T1> &in, Tensor<T2> &out, uint32_t shift)
+{
+    if(shift > 15)
+    {
+        ARM_COMPUTE_ERROR("Shift in accumulate_squared must be within the range [0, 15]");
+    }
+    using intermediate_type = typename common_promoted_signed_type<T1, T2>::intermediate_type;
+    intermediate_type denom = 1 << shift;
+
+    for(int i = 0; i < in.num_elements(); ++i)
+    {
+        intermediate_type val = static_cast<intermediate_type>(out[i]) + (static_cast<intermediate_type>(in[i]) * static_cast<intermediate_type>(in[i]) / denom);
+        out[i]                = saturate_cast<T2>(val);
+    }
+}
+
+// Accumulate weighted
+template <typename T>
+void accumulate_weighted(const Tensor<T> &in, Tensor<T> &out, float alpha)
+{
+    if(alpha < 0.f || alpha > 1.f)
+    {
+        ARM_COMPUTE_ERROR("Weight (alpha) specified in accumulate_weighted must be within the range [0, 1]");
+    }
+    using intermediate_type = typename common_promoted_signed_type<T>::intermediate_type;
+
+    for(int i = 0; i < in.num_elements(); ++i)
+    {
+        double val = (1. - static_cast<double>(alpha)) * static_cast<intermediate_type>(out[i]) + static_cast<double>(alpha) * static_cast<intermediate_type>(in[i]);
+        out[i]     = static_cast<T>(val);
+    }
+}
+
+// Arithmetic addition
+template <typename T1, typename T2, typename T3>
+void arithmetic_addition(const Tensor<T1> &in1, const Tensor<T2> &in2, Tensor<T3> &out, ConvertPolicy convert_policy)
+{
+    using intermediate_type = typename common_promoted_signed_type<T1, T2, T3>::intermediate_type;
+
+    for(int i = 0; i < in1.num_elements(); ++i)
+    {
+        intermediate_type val = static_cast<intermediate_type>(in1[i]) + static_cast<intermediate_type>(in2[i]);
+        out[i]                = (convert_policy == ConvertPolicy::SATURATE) ? saturate_cast<T3>(val) : static_cast<T3>(val);
+    }
+}
+
+// Arithmetic Subtraction
+template <typename T1, typename T2, typename T3>
+void arithmetic_subtraction(const Tensor<T1> &in1, const Tensor<T2> &in2, Tensor<T3> &out, ConvertPolicy convert_policy)
+{
+    using intermediate_type = typename common_promoted_signed_type<T1, T2, T3>::intermediate_type;
+
+    for(int i = 0; i < in1.num_elements(); ++i)
+    {
+        intermediate_type val = static_cast<intermediate_type>(in1[i]) - static_cast<intermediate_type>(in2[i]);
+        out[i]                = (convert_policy == ConvertPolicy::SATURATE) ? saturate_cast<T3>(val) : static_cast<T3>(val);
+    }
+}
+
+// Bitwise and
+template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
+void bitwise_and(const Tensor<T> &in1, const Tensor<T> &in2, Tensor<T> &out)
+{
+    for(int i = 0; i < in1.num_elements(); ++i)
+    {
+        out[i] = in1[i] & in2[i];
+    }
+}
+
+// Bitwise or
+template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
+void bitwise_or(const Tensor<T> &in1, const Tensor<T> &in2, Tensor<T> &out)
+{
+    for(int i = 0; i < in1.num_elements(); ++i)
+    {
+        out[i] = in1[i] | in2[i];
+    }
+}
+
+// Bitwise xor
+template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
+void bitwise_xor(const Tensor<T> &in1, const Tensor<T> &in2, Tensor<T> &out)
+{
+    for(int i = 0; i < in1.num_elements(); ++i)
+    {
+        out[i] = in1[i] ^ in2[i];
+    }
+}
+
+// Bitwise not
+template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
+void bitwise_not(const Tensor<T> &in, Tensor<T> &out)
+{
+    for(int i = 0; i < in.num_elements(); ++i)
+    {
+        out[i] = ~in[i];
+    }
+}
+
+// 3-by-3 box filter
+template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
+void box3x3(const Tensor<T> &in, Tensor<T> &out)
+{
+    const std::array<T, 9> filter{ { 1, 1, 1, 1, 1, 1, 1, 1, 1 } };
+    float             scale        = 1.f / static_cast<float>(filter.size());
+    const ValidRegion valid_region = shape_to_valid_region_undefined_border(in.shape(), BorderSize(1));
+    for(int element_idx = 0; element_idx < in.num_elements(); ++element_idx)
+    {
+        const Coordinates id = index2coord(in.shape(), element_idx);
+        if(is_in_valid_region(valid_region, id))
+        {
+            apply_2d_spatial_filter(id, in, out, TensorShape(3U, 3U), filter.data(), scale);
+        }
+    }
+}
+
+// Depth conversion
+template <typename T1, typename T2>
+void depth_convert(const Tensor<T1> &in, Tensor<T2> &out, ConvertPolicy policy, uint32_t shift)
+{
+    ARM_COMPUTE_ERROR("The conversion is not supported");
+}
+
+template <>
+void depth_convert<int8_t, float>(const Tensor<int8_t> &in, Tensor<float> &out, ConvertPolicy policy, uint32_t shift)
+{
+    const int8_t fixed_point_position = static_cast<int8_t>(in.fixed_point_position());
+    for(int i = 0; i < in.num_elements(); ++i)
+    {
+        out[i] = static_cast<float>(in[i]) * (1.0f / (1 << fixed_point_position));
+    }
+}
+
+template <>
+void depth_convert<float, int8_t>(const Tensor<float> &in, Tensor<int8_t> &out, ConvertPolicy policy, uint32_t shift)
+{
+    const int8_t fixed_point_position = static_cast<int8_t>(in.fixed_point_position());
+    for(int i = 0; i < in.num_elements(); ++i)
+    {
+        float val = in[i] * (1 << fixed_point_position) + 0.5f;
+        out[i]    = ((policy == ConvertPolicy::SATURATE) ? saturate_cast<int8_t>(val) : static_cast<int8_t>(val));
+    }
+}
+
+template <>
+void depth_convert<uint8_t, uint16_t>(const Tensor<uint8_t> &in, Tensor<uint16_t> &out, ConvertPolicy policy, uint32_t shift)
+{
+    for(int i = 0; i < in.num_elements(); ++i)
+    {
+        out[i] = static_cast<uint16_t>(in[i]) << shift;
+    }
+}
+
+template <>
+void depth_convert<uint8_t, int16_t>(const Tensor<uint8_t> &in, Tensor<int16_t> &out, ConvertPolicy policy, uint32_t shift)
+{
+    for(int i = 0; i < in.num_elements(); ++i)
+    {
+        out[i] = static_cast<int16_t>(in[i]) << shift;
+    }
+}
+
+template <>
+void depth_convert<uint8_t, int32_t>(const Tensor<uint8_t> &in, Tensor<int32_t> &out, ConvertPolicy policy, uint32_t shift)
+{
+    for(int i = 0; i < in.num_elements(); ++i)
+    {
+        out[i] = static_cast<int32_t>(in[i]) << shift;
+    }
+}
+
+template <>
+void depth_convert<uint16_t, uint8_t>(const Tensor<uint16_t> &in, Tensor<uint8_t> &out, ConvertPolicy policy, uint32_t shift)
+{
+    for(int i = 0; i < in.num_elements(); ++i)
+    {
+        uint16_t val = in[i] >> shift;
+        out[i]       = ((policy == ConvertPolicy::SATURATE) ? saturate_cast<uint8_t>(val) : static_cast<uint8_t>(val));
+    }
+}
+
+template <>
+void depth_convert<uint16_t, uint32_t>(const Tensor<uint16_t> &in, Tensor<uint32_t> &out, ConvertPolicy policy, uint32_t shift)
+{
+    for(int i = 0; i < in.num_elements(); ++i)
+    {
+        out[i] = static_cast<uint32_t>(in[i]) << shift;
+    }
+}
+
+template <>
+void depth_convert<int16_t, uint8_t>(const Tensor<int16_t> &in, Tensor<uint8_t> &out, ConvertPolicy policy, uint32_t shift)
+{
+    for(int i = 0; i < in.num_elements(); ++i)
+    {
+        int16_t val = in[i] >> shift;
+        out[i]      = ((policy == ConvertPolicy::SATURATE) ? saturate_cast<uint8_t>(val) : static_cast<uint8_t>(val));
+    }
+}
+template <>
+void depth_convert<int16_t, int32_t>(const Tensor<int16_t> &in, Tensor<int32_t> &out, ConvertPolicy policy, uint32_t shift)
+{
+    for(int i = 0; i < in.num_elements(); ++i)
+    {
+        out[i] = static_cast<int32_t>(in[i]) << shift;
+    }
+}
+
+// Matrix multiplication for floating point type
+template <typename T, typename std::enable_if<std::is_floating_point<T>::value, int>::type * = nullptr>
+void gemm(const Tensor<T> &in1, const Tensor<T> &in2, const Tensor<T> &in3, Tensor<T> &out, float alpha, float beta)
+{
+    const int M = out.shape().y();
+    const int N = out.shape().x();
+    const int K = in1.shape().x();
+
+    for(int r = 0; r < M; ++r)
+    {
+        for(int c = 0; c < N; ++c)
+        {
+            T acc = 0.0f;
+
+            for(int k = 0; k < K; ++k)
+            {
+                const T a0 = in1[r * K + k];
+                const T b0 = in2[k * N + c];
+
+                acc += a0 * b0;
+            }
+
+            // Finalize the result: A * B * alpha + C * beta
+            const T c0     = in3[c + r * N];
+            out[c + r * N] = alpha * acc + beta * c0;
+        }
+    }
+}
+
+// Matrix multiplication for fixed point type
+template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type * = nullptr>
+void gemm(const Tensor<T> &in1, const Tensor<T> &in2, const Tensor<T> &in3, Tensor<T> &out, float alpha, float beta)
+{
+    using namespace fixed_point_arithmetic;
+
+    using promoted_type = typename fixed_point_arithmetic::traits::promote<T>::type;
+
+    const int    M                    = out.shape().y();
+    const int    N                    = out.shape().x();
+    const int    K                    = in1.shape().x();
+    const int8_t fixed_point_position = static_cast<int8_t>(in1.fixed_point_position());
+
+    const fixed_point<T> alpha_q(alpha, fixed_point_position);
+    const fixed_point<T> beta_q(beta, fixed_point_position);
+
+    for(int r = 0; r < M; ++r)
+    {
+        for(int c = 0; c < N; ++c)
+        {
+            fixed_point<promoted_type> acc_q(0, fixed_point_position);
+
+            for(int k = 0; k < K; ++k)
+            {
+                const fixed_point<promoted_type> a0_q(in1[r * K + k], fixed_point_position, true);
+                const fixed_point<promoted_type> b0_q(in2[k * N + c], fixed_point_position, true);
+                const fixed_point<promoted_type> axb_q = a0_q * b0_q;
+
+                acc_q = axb_q + acc_q;
+            }
+
+            // Finalize the result: A * B * alpha + C * beta
+            const fixed_point<T> c0_q(in3[c + r * N], fixed_point_position, true);
+
+            fixed_point<T> res_q(acc_q);
+            res_q = alpha_q * res_q;
+            res_q = (c0_q * beta_q) + res_q;
+
+            // Store the result
+            out[c + r * N] = res_q.raw();
+        }
+    }
+}
+
+// Pixel-wise multiplication
+template <typename T1, typename T2, typename T3>
+void pixel_wise_multiplication(const Tensor<T1> &in1, const Tensor<T2> &in2, Tensor<T3> &out, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
+{
+    if(scale < 0)
+    {
+        ARM_COMPUTE_ERROR("Scale of pixel-wise multiplication must be non-negative");
+    }
+    using intermediate_type = typename common_promoted_signed_type<T1, T2, T3>::intermediate_type;
+    for(int i = 0; i < in1.num_elements(); ++i)
+    {
+        double val = static_cast<intermediate_type>(in1[i]) * static_cast<intermediate_type>(in2[i]) * static_cast<double>(scale);
+        if(std::is_floating_point<T3>::value)
+        {
+            out[i] = val;
+        }
+        else
+        {
+            double rounded_val = 0;
+            switch(rounding_policy)
+            {
+                case(RoundingPolicy::TO_ZERO):
+                    rounded_val = cpp11::trunc(val);
+                    break;
+                case(RoundingPolicy::TO_NEAREST_UP):
+                    rounded_val = cpp11::round_half_up(val);
+                    break;
+                case(RoundingPolicy::TO_NEAREST_EVEN):
+                    rounded_val = cpp11::round_half_even(val);
+                    break;
+                default:
+                    ARM_COMPUTE_ERROR("Unsupported rounding policy");
+            }
+            out[i] = (convert_policy == ConvertPolicy::SATURATE) ? saturate_cast<T3>(rounded_val) : static_cast<T3>(rounded_val);
+        }
+    }
+}
+
+// Fixed-point Pixel-wise Multiplication
+template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
+void fixed_point_pixel_wise_multiplication(const Tensor<T> &in1, const Tensor<T> &in2, Tensor<T> &out, int scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
+{
+    using namespace fixed_point_arithmetic;
+
+    const int fixed_point_position = in1.fixed_point_position();
+
+    ARM_COMPUTE_ERROR_ON_MSG(in1.data_type() != in2.data_type() || in1.data_type() != out.data_type(),
+                             "Tensors must all have the same DataType");
+    ARM_COMPUTE_ERROR_ON_MSG(fixed_point_position != in2.fixed_point_position() || fixed_point_position != out.fixed_point_position(),
+                             "Fixed-point position must be the same for both inputs and outputs");
+
+    // Validate fixed_point_position
+    ARM_COMPUTE_ERROR_ON((in1.data_type() == DataType::QS8) && (fixed_point_position == 0 || fixed_point_position > 7));
+    ARM_COMPUTE_ERROR_ON((in1.data_type() == DataType::QS16) && (fixed_point_position == 0 || fixed_point_position > 15));
+
+    fixed_point<T> fp_scale(scale, fixed_point_position);
+    const bool     is_sat     = convert_policy == ConvertPolicy::SATURATE;
+    const bool     do_scaling = scale != 1;
+
+    for(int i = 0; i < in1.num_elements(); ++i)
+    {
+        fixed_point<T> val1(in1[i], fixed_point_position, true);
+        fixed_point<T> val2(in2[i], fixed_point_position, true);
+        fixed_point<T> res = (is_sat) ? val1 * val2 : mul<OverflowPolicy::WRAP>(val1, val2);
+        if(do_scaling)
+        {
+            res = (is_sat) ? res * fp_scale : mul<OverflowPolicy::WRAP>(res, fp_scale);
+        }
+        out[i] = res.raw();
+    }
+}
+
+// Threshold
+template <typename T>
+void threshold(const Tensor<T> &in, Tensor<T> &out, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper)
+{
+    switch(type)
+    {
+        case ThresholdType::BINARY:
+            for(int i = 0; i < in.num_elements(); ++i)
+            {
+                out[i] = ((in[i] > threshold) ? true_value : false_value);
+            }
+            break;
+        case ThresholdType::RANGE:
+            for(int i = 0; i < in.num_elements(); ++i)
+            {
+                if(in[i] > upper)
+                {
+                    out[i] = false_value;
+                }
+                else if(in[i] < threshold)
+                {
+                    out[i] = false_value;
+                }
+                else
+                {
+                    out[i] = true_value;
+                }
+            }
+            break;
+        default:
+            ARM_COMPUTE_ERROR("Thresholding type not recognised");
+            break;
+    }
+}
+
+// Activation Layer for floating point type
+template <typename T, typename std::enable_if<std::is_floating_point<T>::value, int>::type * = nullptr>
+void activation_layer(const Tensor<T> &in, Tensor<T> &out, ActivationLayerInfo act_info)
+{
+    const T a = static_cast<T>(act_info.a());
+    const T b = static_cast<T>(act_info.b());
+
+    for(int i = 0; i < in.num_elements(); ++i)
+    {
+        T x = in[i];
+        switch(act_info.activation())
+        {
+            case ActivationLayerInfo::ActivationFunction::ABS:
+                out[i] = std::abs(x);
+                break;
+            case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
+                out[i] = std::min<T>(a, std::max<T>(0, x));
+                break;
+            case ActivationLayerInfo::ActivationFunction::LINEAR:
+                out[i] = a * x + b;
+                break;
+            case ActivationLayerInfo::ActivationFunction::LOGISTIC:
+                out[i] = static_cast<T>(1) / (static_cast<T>(1) + std::exp(-x));
+                break;
+            case ActivationLayerInfo::ActivationFunction::RELU:
+                out[i] = std::max<T>(0, x);
+                break;
+            case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
+                out[i] = std::log(static_cast<T>(1) + std::exp(x));
+                break;
+            case ActivationLayerInfo::ActivationFunction::SQRT:
+                out[i] = std::sqrt(x);
+                break;
+            case ActivationLayerInfo::ActivationFunction::SQUARE:
+                out[i] = x * x;
+                break;
+            case ActivationLayerInfo::ActivationFunction::TANH:
+                out[i] = a * std::tanh(b * x);
+                break;
+            default:
+                ARM_COMPUTE_ERROR("Activation function not recognised");
+                break;
+        }
+    }
+}
+
+// Activation Layer for fixed point type
+template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type * = nullptr>
+void activation_layer(const Tensor<T> &in, Tensor<T> &out, ActivationLayerInfo act_info)
+{
+    using namespace fixed_point_arithmetic;
+    int                                     fixed_point_position = in.fixed_point_position();
+    ActivationLayerInfo::ActivationFunction act_func             = act_info.activation();
+    const fixed_point<T>                    a(act_info.a(), fixed_point_position);
+    const fixed_point<T>                    b(act_info.b(), fixed_point_position);
+    const fixed_point<T>                    const_0(0, fixed_point_position);
+    const fixed_point<T>                    const_1(1, fixed_point_position);
+
+    for(int i = 0; i < in.num_elements(); ++i)
+    {
+        fixed_point<T> x(in[i], fixed_point_position, true);
+        switch(act_func)
+        {
+            case ActivationLayerInfo::ActivationFunction::ABS:
+                out[i] = abs(x).raw();
+                break;
+            case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
+                out[i] = min(a, max(const_0, x)).raw();
+                break;
+            case ActivationLayerInfo::ActivationFunction::LINEAR:
+                out[i] = add(b, mul(a, x)).raw();
+                break;
+            case ActivationLayerInfo::ActivationFunction::LOGISTIC:
+                out[i] = (const_1 / (const_1 + exp(-x))).raw();
+                break;
+            case ActivationLayerInfo::ActivationFunction::RELU:
+                out[i] = max(const_0, x).raw();
+                break;
+            case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
+                out[i] = log(const_1 + exp(x)).raw();
+                break;
+            case ActivationLayerInfo::ActivationFunction::SQRT:
+                out[i] = (const_1 / inv_sqrt(x)).raw();
+                break;
+            case ActivationLayerInfo::ActivationFunction::SQUARE:
+                out[i] = mul(x, x).raw();
+                break;
+            case ActivationLayerInfo::ActivationFunction::TANH:
+                out[i] = tanh(x).raw();
+                break;
+            default:
+                ARM_COMPUTE_ERROR("Activation function not recognised");
+                break;
+        }
+    }
+}
+
+// Batch Normalization Layer for fixed point type
+template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type * = nullptr>
+void batch_normalization_layer(const Tensor<T> &in, Tensor<T> &out, const Tensor<T> &mean, const Tensor<T> &var, const Tensor<T> &beta, const Tensor<T> &gamma, float epsilon, int fixed_point_position)
+{
+    const int cols       = static_cast<int>(in.shape()[0]);
+    const int rows       = static_cast<int>(in.shape()[1]);
+    const int depth      = static_cast<int>(in.shape()[2]);
+    int       upper_dims = in.shape().total_size() / (cols * rows * depth);
+
+    for(int r = 0; r < upper_dims; ++r)
+    {
+        for(int i = 0; i < depth; ++i)
+        {
+            for(int k = 0; k < rows; ++k)
+            {
+                for(int l = 0; l < cols; ++l)
+                {
+                    const int                              pos = l + k * cols + i * rows * cols + r * cols * rows * depth;
+                    fixed_point_arithmetic::fixed_point<T> in_qs8(in[pos], fixed_point_position, true);
+                    fixed_point_arithmetic::fixed_point<T> var_qs8(var[i], fixed_point_position, true);
+                    fixed_point_arithmetic::fixed_point<T> mean_qs8(mean[i], fixed_point_position, true);
+                    fixed_point_arithmetic::fixed_point<T> beta_qs8(beta[i], fixed_point_position, true);
+                    fixed_point_arithmetic::fixed_point<T> gamma_qs8(gamma[i], fixed_point_position, true);
+                    fixed_point_arithmetic::fixed_point<T> epsilon_qs8(epsilon, fixed_point_position);
+
+                    auto denominator = fixed_point_arithmetic::inv_sqrt(var_qs8 + epsilon_qs8);
+                    auto numerator   = in_qs8 - mean_qs8;
+                    auto x_bar       = numerator * denominator;
+                    x_bar            = beta_qs8 + x_bar * gamma_qs8;
+                    out[pos]         = x_bar.raw();
+                }
+            }
+        }
+    }
+}
+
+// Batch Normalization Layer for floating point type
+template <typename T, typename std::enable_if<std::is_floating_point<T>::value, int>::type * = nullptr>
+void batch_normalization_layer(const Tensor<T> &in, Tensor<T> &out, const Tensor<T> &mean, const Tensor<T> &var, const Tensor<T> &beta, const Tensor<T> &gamma, float epsilon, int fixed_point_position)
+{
+    const int cols       = static_cast<int>(in.shape()[0]);
+    const int rows       = static_cast<int>(in.shape()[1]);
+    const int depth      = static_cast<int>(in.shape()[2]);
+    int       upper_dims = in.shape().total_size() / (cols * rows * depth);
+
+    for(int r = 0; r < upper_dims; ++r)
+    {
+        for(int i = 0; i < depth; ++i)
+        {
+            for(int k = 0; k < rows; ++k)
+            {
+                for(int l = 0; l < cols; ++l)
+                {
+                    const int   pos         = l + k * cols + i * rows * cols + r * cols * rows * depth;
+                    const float denominator = sqrt(var[i] + epsilon);
+                    const float numerator   = in[pos] - mean[i];
+                    const float x_bar       = numerator / denominator;
+                    out[pos]                = beta[i] + x_bar * gamma[i];
+                }
+            }
+        }
+    }
+}
+
+// Convolution layer
+template <typename T>
+void convolution_layer(const Tensor<T> &in, const Tensor<T> &weights, const Tensor<T> &bias, Tensor<T> &out, const PadStrideInfo &conv_info)
+{
+    const int width_in       = in.shape().x();
+    const int height_in      = in.shape().y();
+    const int depth_in       = in.shape().z();
+    const int width_out      = out.shape().x();
+    const int height_out     = out.shape().y();
+    const int depth_out      = out.shape().z();
+    const int width_weights  = weights.shape().x();
+    const int height_weights = weights.shape().y();
+    const int depth_weights  = weights.shape().z();
+    const int pad_xi         = std::min(static_cast<int>(conv_info.pad().first), width_weights / 2);
+    const int pad_yi         = std::min(static_cast<int>(conv_info.pad().second), height_weights / 2);
+    const int start_xi       = width_weights / 2 - pad_xi;
+    const int start_yi       = height_weights / 2 - pad_yi;
+    const int end_xi         = width_in - start_xi;
+    const int end_yi         = height_in - start_yi;
+    const int stride_xi      = conv_info.stride().first;
+    const int stride_yi      = conv_info.stride().second;
+    const int num_batches    = in.shape().total_size() / (width_in * height_in * depth_in);
+
+    for(int r = 0; r < num_batches; ++r)
+    {
+        for(int yi = start_yi; yi < end_yi; yi += stride_yi)
+        {
+            for(int xi = start_xi; xi < end_xi; xi += stride_xi)
+            {
+                for(int ofm = 0; ofm < depth_out; ++ofm)
+                {
+                    // Compute input and output offsets
+                    const int offset_in  = r * width_in * height_in * depth_in;
+                    const int xo         = (xi - start_xi) / stride_xi;
+                    const int yo         = (yi - start_yi) / stride_yi;
+                    const int offset_out = xo + yo * width_out + ofm * width_out * height_out + r * width_out * height_out * depth_out;
+
+                    // Compute 3D convolution
+                    convolution3d(in.data() + offset_in,
+                                  weights.data() + ofm * width_weights * height_weights * depth_weights,
+                                  bias.data() + ofm,
+                                  out.data() + offset_out,
+                                  xi, yi,
+                                  width_in, height_in, depth_in,
+                                  width_weights, height_weights,
+                                  static_cast<int8_t>(in.fixed_point_position()));
+                }
+            }
+        }
+    }
+}
+
+// Fully connected layer
+template <typename T>
+void fully_connected_layer(const Tensor<T> &in, const Tensor<T> &weights, const Tensor<T> &bias, Tensor<T> &out)
+{
+    ARM_COMPUTE_ERROR_ON(weights.shape().x() != out.shape().x());
+    ARM_COMPUTE_ERROR_ON(weights.shape().y() != in.shape().x() * in.shape().y() * in.shape().z());
+    const int cols_weights = weights.shape().x();
+    const int rows_weights = weights.shape().y();
+    const int num_batches  = in.shape().total_size() / rows_weights;
+
+    for(int k = 0; k < num_batches; ++k)
+    {
+        vector_matrix_multiply<T>(in.data() + k * rows_weights,
+                                  weights.data(),
+                                  bias.data(),
+                                  out.data() + k * cols_weights,
+                                  cols_weights,
+                                  rows_weights,
+                                  in.fixed_point_position());
+    }
+}
+
+// Normalization Layer for floating point type
+template <typename T, typename std::enable_if<std::is_floating_point<T>::value, int>::type * = nullptr>
+void normalization_layer(const Tensor<T> &in, Tensor<T> &out, NormalizationLayerInfo norm_info)
+{
+    const uint32_t norm_size = norm_info.norm_size();
+    NormType       type      = norm_info.type();
+    float          beta      = norm_info.beta();
+    uint32_t       kappa     = norm_info.kappa();
+
+    const int cols       = static_cast<int>(in.shape()[0]);
+    const int rows       = static_cast<int>(in.shape()[1]);
+    const int depth      = static_cast<int>(in.shape()[2]);
+    int       upper_dims = in.shape().total_size() / (cols * rows);
+
+    float coeff       = norm_info.scale_coeff();
+    int   radius_cols = norm_size / 2;
+    // IN_MAP_1D and CROSS_MAP normalize over a single axis only
+    int radius_rows = (NormType::IN_MAP_2D == type) ? norm_size / 2 : 0;
+
+    if(type == NormType::CROSS_MAP)
+    {
+        // Remove also depth from upper dimensions since it is the axes we want
+        // to use for normalization
+        upper_dims /= depth;
+        for(int r = 0; r < upper_dims; ++r)
+        {
+            for(int i = 0; i < rows; ++i)
+            {
+                for(int k = 0; k < cols; ++k)
+                {
+                    for(int l = 0; l < depth; ++l)
+                    {
+                        float accumulated_scale = 0.f;
+                        for(int j = -radius_cols; j <= radius_cols; ++j)
+                        {
+                            const int z = l + j;
+                            if(z >= 0 && z < depth)
+                            {
+                                const T value = in[k + i * cols + z * rows * cols + r * cols * rows * depth];
+                                accumulated_scale += value * value;
+                            }
+                        }
+                        out[k + i * cols + l * rows * cols + r * cols * rows * depth] = kappa + accumulated_scale * coeff;
+                    }
+                }
+            }
+        }
+    }
+    else
+    {
+        for(int r = 0; r < upper_dims; ++r)
+        {
+            for(int i = 0; i < rows; ++i)
+            {
+                for(int k = 0; k < cols; ++k)
+                {
+                    float accumulated_scale = 0.f;
+                    for(int j = -radius_rows; j <= radius_rows; ++j)
+                    {
+                        const int y = i + j;
+                        for(int l = -radius_cols; l <= radius_cols; ++l)
+                        {
+                            const int x = k + l;
+                            if((x >= 0 && y >= 0) && (x < cols && y < rows))
+                            {
+                                const T value = in[x + y * cols + r * cols * rows];
+                                accumulated_scale += value * value;
+                            }
+                        }
+                    }
+                    out[k + i * cols + r * cols * rows] = kappa + accumulated_scale * coeff;
+                }
+            }
+        }
+    }
+
+    if(beta == 1.f)
+    {
+        for(int i = 0; i < out.num_elements(); ++i)
+        {
+            out[i] = in[i] / out[i];
+        }
+    }
+    else if(beta == 0.5f)
+    {
+        for(int i = 0; i < out.num_elements(); ++i)
+        {
+            out[i] = in[i] / std::sqrt(out[i]);
+        }
+    }
+    else
+    {
+        for(int i = 0; i < out.num_elements(); ++i)
+        {
+            out[i] = in[i] * std::exp(std::log(out[i]) * -beta);
+        }
+    }
+}
+// Normalization Layer for fixed-point types
+template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type * = nullptr>
+void normalization_layer(const Tensor<T> &in, Tensor<T> &out, NormalizationLayerInfo norm_info)
+{
+    using namespace fixed_point_arithmetic;
+
+    const int fixed_point_position = in.fixed_point_position();
+
+    const uint32_t norm_size = norm_info.norm_size();
+    NormType       type      = norm_info.type();
+    fixed_point<T> beta(norm_info.beta(), fixed_point_position);
+    fixed_point<T> kappa(norm_info.kappa(), fixed_point_position);
+
+    const int cols       = static_cast<int>(in.shape()[0]);
+    const int rows       = static_cast<int>(in.shape()[1]);
+    const int depth      = static_cast<int>(in.shape()[2]);
+    int       upper_dims = in.shape().total_size() / (cols * rows);
+
+    fixed_point<T> coeff(norm_info.scale_coeff(), fixed_point_position);
+    int            radius_cols = norm_size / 2;
+    // IN_MAP_1D and CROSS_MAP normalize over a single axis only
+    int radius_rows = (NormType::IN_MAP_2D == type) ? norm_size / 2 : 0;
+
+    if(type == NormType::CROSS_MAP)
+    {
+        // Remove also depth from upper dimensions since it is the axes we want
+        // to use for normalization
+        upper_dims /= depth;
+        for(int r = 0; r < upper_dims; ++r)
+        {
+            for(int i = 0; i < rows; ++i)
+            {
+                for(int k = 0; k < cols; ++k)
+                {
+                    for(int l = 0; l < depth; ++l)
+                    {
+                        fixed_point<T> accumulated_scale(0.f, fixed_point_position);
+                        for(int j = -radius_cols; j <= radius_cols; ++j)
+                        {
+                            const int z = l + j;
+                            if(z >= 0 && z < depth)
+                            {
+                                const T              value = in[k + i * cols + z * rows * cols + r * cols * rows * depth];
+                                const fixed_point<T> fp_value(value, fixed_point_position, true);
+                                accumulated_scale = add(accumulated_scale, mul(fp_value, fp_value));
+                            }
+                        }
+                        accumulated_scale                                             = add(kappa, mul(accumulated_scale, coeff));
+                        out[k + i * cols + l * rows * cols + r * cols * rows * depth] = accumulated_scale.raw();
+                    }
+                }
+            }
+        }
+    }
+    else
+    {
+        for(int r = 0; r < upper_dims; ++r)
+        {
+            for(int i = 0; i < rows; ++i)
+            {
+                for(int k = 0; k < cols; ++k)
+                {
+                    fixed_point<T> accumulated_scale(0.f, fixed_point_position);
+                    for(int j = -radius_rows; j <= radius_rows; ++j)
+                    {
+                        const int y = i + j;
+                        for(int l = -radius_cols; l <= radius_cols; ++l)
+                        {
+                            const int x = k + l;
+                            if((x >= 0 && y >= 0) && (x < cols && y < rows))
+                            {
+                                const T              value = in[x + y * cols + r * cols * rows];
+                                const fixed_point<T> fp_value(value, fixed_point_position, true);
+                                accumulated_scale = add(accumulated_scale, mul(fp_value, fp_value));
+                            }
+                        }
+                    }
+                    accumulated_scale                   = add(kappa, mul(accumulated_scale, coeff));
+                    out[k + i * cols + r * cols * rows] = accumulated_scale.raw();
+                }
+            }
+        }
+    }
+
+    if(norm_info.beta() == 1.f)
+    {
+        for(int i = 0; i < out.num_elements(); ++i)
+        {
+            fixed_point<T> res = div(fixed_point<T>(in[i], fixed_point_position, true), fixed_point<T>(out[i], fixed_point_position, true));
+            out[i]             = res.raw();
+        }
+    }
+    else
+    {
+        const fixed_point<T> beta(norm_info.beta(), fixed_point_position);
+        for(int i = 0; i < out.num_elements(); ++i)
+        {
+            fixed_point<T> res = pow(fixed_point<T>(out[i], fixed_point_position, true), beta);
+            res                = div(fixed_point<T>(in[i], fixed_point_position, true), res);
+            out[i]             = res.raw();
+        }
+    }
+}
+
+// Pooling layer
+template <typename T>
+void pooling_layer(const Tensor<T> &in, Tensor<T> &out, PoolingLayerInfo pool_info, int fixed_point_position)
+{
+    const int   pool_size     = pool_info.pool_size();
+    PoolingType type          = pool_info.pool_type();
+    int         pool_stride_x = 0;
+    int         pool_stride_y = 0;
+    int         pad_x         = 0;
+    int         pad_y         = 0;
+    std::tie(pool_stride_x, pool_stride_y) = pool_info.pad_stride_info().stride();
+    std::tie(pad_x, pad_y)                 = pool_info.pad_stride_info().pad();
+
+    const int cols_in = static_cast<int>(in.shape()[0]);
+    const int rows_in = static_cast<int>(in.shape()[1]);
+
+    const int cols_out = static_cast<int>(out.shape()[0]);
+    const int rows_out = static_cast<int>(out.shape()[1]);
+
+    int upper_dims = in.shape().total_size() / (cols_in * rows_in);
+
+    int pooled_height = static_cast<int>(ceil(static_cast<float>(rows_in + 2 * pad_x - pool_size) / pool_stride_x)) + 1;
+    int pooled_width  = static_cast<int>(ceil(static_cast<float>(cols_in + 2 * pad_y - pool_size) / pool_stride_y)) + 1;
+
+    if((pooled_height - 1) * pool_stride_x >= rows_in + pad_x)
+    {
+        --pooled_height;
+    }
+    if((pooled_width - 1) * pool_stride_y >= cols_in + pad_y)
+    {
+        --pooled_width;
+    }
+
+    if(type == PoolingType::MAX)
+    {
+        for(int r = 0; r < upper_dims; ++r)
+        {
+            for(int i = 0; i < pooled_height; ++i)
+            {
+                for(int k = 0; k < pooled_width; ++k)
+                {
+                    int hstart = i * pool_stride_x - pad_x;
+                    int wstart = k * pool_stride_y - pad_y;
+                    int hend   = std::min(hstart + pool_size, rows_in);
+                    int wend   = std::min(wstart + pool_size, cols_in);
+                    hstart     = std::max(hstart, 0);
+                    wstart     = std::max(wstart, 0);
+
+                    T max_val = std::numeric_limits<T>::lowest();
+                    for(int y = hstart; y < hend; ++y)
+                    {
+                        for(int x = wstart; x < wend; ++x)
+                        {
+                            T val = in[r * cols_in * rows_in + y * cols_in + x];
+                            if(val > max_val)
+                            {
+                                max_val = val;
+                            }
+                        }
+                    }
+
+                    out[r * rows_out * cols_out + i * pooled_width + k] = max_val;
+                }
+            }
+        }
+    }
+    else // Average pooling
+    {
+        for(int r = 0; r < upper_dims; ++r)
+        {
+            for(int i = 0; i < pooled_height; ++i)
+            {
+                for(int k = 0; k < pooled_width; ++k)
+                {
+                    T avg_val = 0;
+
+                    int hstart = i * pool_stride_x - pad_x;
+                    int wstart = k * pool_stride_y - pad_y;
+                    int hend   = std::min(hstart + pool_size, cols_in + pad_x);
+                    int wend   = std::min(wstart + pool_size, rows_in + pad_y);
+                    int pool   = (hend - hstart) * (wend - wstart);
+                    hstart     = std::max(hstart, 0);
+                    wstart     = std::max(wstart, 0);
+                    hend       = std::min(hend, rows_in);
+                    wend       = std::min(wend, cols_in);
+
+                    if(std::is_floating_point<T>::value)
+                    {
+                        for(int y = hstart; y < hend; ++y)
+                        {
+                            for(int x = wstart; x < wend; ++x)
+                            {
+                                avg_val += in[r * cols_in * rows_in + y * cols_in + x];
+                            }
+                        }
+                        out[r * rows_out * cols_out + i * pooled_width + k] = avg_val / pool;
+                    }
+                    else
+                    {
+                        static std::array<qint8_t, 10> scale_values_q8 =
+                        { { 0x0, 0x0, 0x40, 0x2A, 0x20, 0x19, 0x15, 0x12, 0x10, 0xE } };
+
+                        for(int y = hstart; y < hend; ++y)
+                        {
+                            for(int x = wstart; x < wend; ++x)
+                            {
+                                avg_val = sqadd_qs8(avg_val, in[r * cols_in * rows_in + y * cols_in + x]);
+                            }
+                        }
+                        out[r * rows_out * cols_out + i * pooled_width + k] = sqmul_qs8(avg_val, (scale_values_q8[pool] >> (7 - fixed_point_position)), fixed_point_position);
+                    }
+                }
+            }
+        }
+    }
+}
+
+// Softmax Layer
+template <typename T, typename std::enable_if<std::is_floating_point<T>::value, int>::type * = nullptr>
+void softmax_layer(const Tensor<T> &in, Tensor<T> &out)
+{
+    const int cols       = static_cast<int>(in.shape()[0]);
+    const int upper_dims = in.shape().total_size() / cols;
+    for(int r = 0; r < upper_dims; ++r)
+    {
+        // Find max
+        T max = std::numeric_limits<T>::lowest();
+        for(int c = 0; c < cols; ++c)
+        {
+            const T x = in[r * cols + c];
+            if(x > max)
+            {
+                max = x;
+            }
+        }
+
+        // Regularize
+        T sum = 0;
+        for(int c = 0; c < cols; ++c)
+        {
+            const T res       = exp(in[r * cols + c] - max);
+            out[r * cols + c] = res;
+            sum += res;
+        }
+
+        // Normalize
+        const T norm_val = 1 / sum;
+        for(int c = 0; c < cols; ++c)
+        {
+            out[r * cols + c] *= norm_val;
+        }
+    }
+}
+template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type * = nullptr>
+void softmax_layer(const Tensor<T> &in, Tensor<T> &out)
+{
+    using namespace fixed_point_arithmetic;
+    using promoted_T = typename test::traits::promote<T>::type;
+
+    const int fixed_point_position = in.fixed_point_position();
+    const int cols                 = static_cast<int>(in.shape()[0]);
+    const int upper_dims           = in.shape().total_size() / cols;
+
+    for(int r = 0; r < upper_dims; ++r)
+    {
+        // Find max
+        fixed_point<T> max(std::numeric_limits<T>::lowest(), fixed_point_position, true);
+        for(int c = 0; c < cols; ++c)
+        {
+            const fixed_point<T> x(in[r * cols + c], fixed_point_position, true);
+            if(x > max)
+            {
+                max = x;
+            }
+        }
+
+        // Regularize
+        fixed_point<promoted_T> sum(0, fixed_point_position);
+        for(int c = 0; c < cols; ++c)
+        {
+            const fixed_point<T> x(in[r * cols + c], fixed_point_position, true);
+            fixed_point<T>       res = exp(x - max);
+            out[r * cols + c]        = res.raw();
+            sum                      = add(sum, static_cast<fixed_point<promoted_T>>(res));
+        }
+
+        // Normalize
+        fixed_point<T> sat_sum(sum);
+        for(int c = 0; c < cols; ++c)
+        {
+            const fixed_point<T> x(out[r * cols + c], fixed_point_position, true);
+            out[r * cols + c] = div(x, sat_sum).raw();
+        }
+    }
+}
+
+// Fixed point operations
+template <typename T>
+void fixed_point_operation(const Tensor<T> &in, Tensor<T> &out, FixedPointOp op)
+{
+    int p = in.fixed_point_position();
+    switch(op)
+    {
+        case FixedPointOp::EXP:
+            for(int i = 0; i < in.num_elements(); ++i)
+            {
+                out[i] = fixed_point_arithmetic::exp(fixed_point_arithmetic::fixed_point<T>(in[i], p, true)).raw();
+            }
+            break;
+        case FixedPointOp::LOG:
+            for(int i = 0; i < in.num_elements(); ++i)
+            {
+                out[i] = fixed_point_arithmetic::log(fixed_point_arithmetic::fixed_point<T>(in[i], p, true)).raw();
+            }
+            break;
+        case FixedPointOp::INV_SQRT:
+            for(int i = 0; i < in.num_elements(); ++i)
+            {
+                out[i] = fixed_point_arithmetic::inv_sqrt(fixed_point_arithmetic::fixed_point<T>(in[i], p, true)).raw();
+            }
+            break;
+        case FixedPointOp::RECIPROCAL:
+            for(int i = 0; i < in.num_elements(); ++i)
+            {
+                out[i] = fixed_point_arithmetic::div(fixed_point_arithmetic::fixed_point<T>(1, p), fixed_point_arithmetic::fixed_point<T>(in[i], p, true)).raw();
+            }
+            break;
+        default:
+            ARM_COMPUTE_ERROR("Fixed point operation not supported");
+            break;
+    }
+}
+
+// Tensor print
+template <typename T>
+void print(const Tensor<T> &in, std::ostream &out)
+{
+    out << "\n";
+    for(int i = 0; i < in.num_elements(); ++i)
+    {
+        out << in[i] << " ";
+    }
+    out << "\n";
+}
+} // namespace tensor_operations
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+
+#endif /* __ARM_COMPUTE_TEST_TENSOR_OPERATIONS_H__ */
diff --git a/tests/validation/TensorVisitors.h b/tests/validation/TensorVisitors.h
new file mode 100644
index 0000000000..a274140734
--- /dev/null
+++ b/tests/validation/TensorVisitors.h
@@ -0,0 +1,386 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_TENSOR_VISITORS_H__
+#define __ARM_COMPUTE_TEST_TENSOR_VISITORS_H__
+
+#include "Tensor.h"
+#include "TensorOperations.h"
+#include "arm_compute/core/Error.h"
+
+#include "boost_wrapper.h"
+
+#include <ostream>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace tensor_visitors
+{
+// Absolute Difference visitor
+struct absolute_difference_visitor : public boost::static_visitor<>
+{
+public:
+    template <typename T1, typename T2, typename T3>
+    void operator()(const Tensor<T1> &in1, const Tensor<T2> &in2, Tensor<T3> &out) const
+    {
+        tensor_operations::absolute_difference(in1, in2, out);
+    }
+};
+// Arithmetic Addition visitor
+struct arithmetic_addition_visitor : public boost::static_visitor<>
+{
+public:
+    explicit arithmetic_addition_visitor(ConvertPolicy convert_policy)
+        : _policy(convert_policy)
+    {
+    }
+
+    template <typename T1, typename T2, typename T3>
+    void operator()(const Tensor<T1> &in1, const Tensor<T2> &in2, Tensor<T3> &out) const
+    {
+        tensor_operations::arithmetic_addition(in1, in2, out, _policy);
+    }
+
+private:
+    ConvertPolicy _policy;
+};
+// Arithmetic Subtraction visitor
+struct arithmetic_subtraction_visitor : public boost::static_visitor<>
+{
+public:
+    explicit arithmetic_subtraction_visitor(ConvertPolicy convert_policy)
+        : _policy(convert_policy)
+    {
+    }
+
+    template <typename T1, typename T2, typename T3>
+    void operator()(const Tensor<T1> &in1, const Tensor<T2> &in2, Tensor<T3> &out) const
+    {
+        tensor_operations::arithmetic_subtraction(in1, in2, out, _policy);
+    }
+
+private:
+    ConvertPolicy _policy;
+};
+// Depth Convert visitor
+struct depth_convert_visitor : public boost::static_visitor<>
+{
+public:
+    explicit depth_convert_visitor(ConvertPolicy policy, uint32_t shift)
+        : _policy(policy), _shift(shift)
+    {
+    }
+
+    template <typename T1, typename T2>
+    void operator()(const Tensor<T1> &in, Tensor<T2> &out) const
+    {
+        tensor_operations::depth_convert(in, out, _policy, _shift);
+    }
+
+private:
+    ConvertPolicy _policy;
+    uint32_t      _shift;
+};
+// GEMM visitor
+struct gemm_visitor : public boost::static_visitor<>
+{
+public:
+    explicit gemm_visitor(const TensorVariant &in1, const TensorVariant &in2, const TensorVariant &in3, float alpha, float beta)
+        : _in1(in1), _in2(in2), _in3(in3), _alpha(alpha), _beta(beta)
+    {
+    }
+
+    template <typename T>
+    void operator()(Tensor<T> &out) const
+    {
+        const Tensor<T> &in1 = boost::get<Tensor<T>>(_in1);
+        const Tensor<T> &in2 = boost::get<Tensor<T>>(_in2);
+        const Tensor<T> &in3 = boost::get<Tensor<T>>(_in3);
+        tensor_operations::gemm(in1, in2, in3, out, _alpha, _beta);
+    }
+
+private:
+    const TensorVariant &_in1, &_in2, &_in3;
+    float                _alpha;
+    float                _beta;
+};
+// Pixel-wise Multiplication visitor
+struct pixel_wise_multiplication_visitor : public boost::static_visitor<>
+{
+public:
+    explicit pixel_wise_multiplication_visitor(float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
+        : _scale(scale), _convert_policy(convert_policy), _rounding_policy(rounding_policy)
+    {
+    }
+
+    template <typename T1, typename T2, typename T3>
+    void operator()(const Tensor<T1> &in1, const Tensor<T2> &in2, Tensor<T3> &out) const
+    {
+        tensor_operations::pixel_wise_multiplication(in1, in2, out, _scale, _convert_policy, _rounding_policy);
+    }
+
+private:
+    float          _scale;
+    ConvertPolicy  _convert_policy;
+    RoundingPolicy _rounding_policy;
+};
+// Fixed Point Pixel-wise Multiplication visitor
+struct fixed_point_pixel_wise_multiplication_visitor : public boost::static_visitor<>
+{
+public:
+    explicit fixed_point_pixel_wise_multiplication_visitor(const TensorVariant &in1, const TensorVariant &in2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy)
+        : _in1(in1), _in2(in2), _scale(scale), _convert_policy(convert_policy), _rounding_policy(rounding_policy)
+    {
+    }
+
+    template <typename T, typename = typename std::enable_if<std::is_integral<T>::value>::type>
+    void operator()(Tensor<T> &out) const
+    {
+        const Tensor<T> &in1 = boost::get<Tensor<T>>(_in1);
+        const Tensor<T> &in2 = boost::get<Tensor<T>>(_in2);
+        tensor_operations::fixed_point_pixel_wise_multiplication(in1, in2, out, _scale, _convert_policy, _rounding_policy);
+    }
+    template < typename T, typename std::enable_if < !std::is_integral<T>::value, int >::type = 0 >
+    void operator()(Tensor<T> &out) const
+    {
+        ARM_COMPUTE_ERROR("NOT SUPPORTED!");
+    }
+
+private:
+    const TensorVariant &_in1;
+    const TensorVariant &_in2;
+    float                _scale;
+    ConvertPolicy        _convert_policy;
+    RoundingPolicy       _rounding_policy;
+};
+// Threshold operation
+void threshold_operation(const Tensor<uint8_t> &in, Tensor<uint8_t> &out, uint8_t threshold, uint8_t false_value, uint8_t true_value, ThresholdType type, uint8_t upper)
+{
+    tensor_operations::threshold(in, out, threshold, false_value, true_value, type, upper);
+}
+// Activation layer visitor
+struct activation_layer_visitor : public boost::static_visitor<>
+{
+public:
+    explicit activation_layer_visitor(const TensorVariant &in, ActivationLayerInfo act_info)
+        : _in(in), _act_info(act_info)
+    {
+    }
+
+    template <typename T>
+    void operator()(Tensor<T> &out) const
+    {
+        const auto &in = boost::get<Tensor<T>>(_in);
+        tensor_operations::activation_layer(in, out, _act_info);
+    }
+
+private:
+    const TensorVariant      &_in;
+    const ActivationLayerInfo _act_info;
+};
+// Batch Normalization Layer visitor
+struct batch_normalization_layer_visitor : public boost::static_visitor<>
+{
+public:
+    explicit batch_normalization_layer_visitor(const TensorVariant &in, const TensorVariant &mean, const TensorVariant &var, const TensorVariant &beta, const TensorVariant &gamma, float epsilon,
+                                               int fixed_point_position = 0)
+        : _in(in), _mean(mean), _var(var), _beta(beta), _gamma(gamma), _epsilon(epsilon), _fixed_point_position(fixed_point_position)
+    {
+    }
+
+    template <typename T>
+    void operator()(Tensor<T> &out) const
+    {
+        const Tensor<T> &in    = boost::get<Tensor<T>>(_in);
+        const Tensor<T> &mean  = boost::get<Tensor<T>>(_mean);
+        const Tensor<T> &var   = boost::get<Tensor<T>>(_var);
+        const Tensor<T> &beta  = boost::get<Tensor<T>>(_beta);
+        const Tensor<T> &gamma = boost::get<Tensor<T>>(_gamma);
+        tensor_operations::batch_normalization_layer(in, out, mean, var, beta, gamma, _epsilon, _fixed_point_position);
+    }
+
+private:
+    const TensorVariant &_in, &_mean, &_var, &_beta, &_gamma;
+    float                _epsilon;
+    int                  _fixed_point_position;
+};
+// Convolution Layer visitor
+struct convolution_layer_visitor : public boost::static_visitor<>
+{
+public:
+    explicit convolution_layer_visitor(const TensorVariant &in, const TensorVariant &weights, const TensorVariant &bias, PadStrideInfo conv_info)
+        : _in(in), _weights(weights), _bias(bias), _conv_info(conv_info)
+    {
+    }
+
+    template <typename T>
+    void operator()(Tensor<T> &out) const
+    {
+        const Tensor<T> &in      = boost::get<Tensor<T>>(_in);
+        const Tensor<T> &weights = boost::get<Tensor<T>>(_weights);
+        const Tensor<T> &bias    = boost::get<Tensor<T>>(_bias);
+        tensor_operations::convolution_layer(in, weights, bias, out, _conv_info);
+    }
+
+private:
+    const TensorVariant &_in;
+    const TensorVariant &_weights;
+    const TensorVariant &_bias;
+    PadStrideInfo        _conv_info;
+};
+
+struct fully_connected_layer_visitor : public boost::static_visitor<>
+{
+public:
+    explicit fully_connected_layer_visitor(const TensorVariant &in, const TensorVariant &weights, const TensorVariant &bias)
+        : _in(in), _weights(weights), _bias(bias)
+    {
+    }
+    template <typename T>
+    void operator()(Tensor<T> &out) const
+    {
+        const Tensor<T> &in      = boost::get<Tensor<T>>(_in);
+        const Tensor<T> &weights = boost::get<Tensor<T>>(_weights);
+        const Tensor<T> &bias    = boost::get<Tensor<T>>(_bias);
+        tensor_operations::fully_connected_layer(in, weights, bias, out);
+    }
+
+private:
+    const TensorVariant &_in;
+    const TensorVariant &_weights;
+    const TensorVariant &_bias;
+};
+
+// Normalization Layer visitor
+struct normalization_layer_visitor : public boost::static_visitor<>
+{
+public:
+    explicit normalization_layer_visitor(const TensorVariant &in, NormalizationLayerInfo norm_info)
+        : _in(in), _norm_info(norm_info)
+    {
+    }
+
+    template <typename T>
+    void operator()(Tensor<T> &out) const
+    {
+        const Tensor<T> &in = boost::get<Tensor<T>>(_in);
+        tensor_operations::normalization_layer(in, out, _norm_info);
+    }
+
+private:
+    const TensorVariant   &_in;
+    NormalizationLayerInfo _norm_info;
+};
+// Pooling layer
+struct pooling_layer_visitor : public boost::static_visitor<>
+{
+public:
+    explicit pooling_layer_visitor(const TensorVariant &in, PoolingLayerInfo pool_info, int fixed_point_position = 0)
+        : _in(in), _pool_info(pool_info), _fixed_point_position(fixed_point_position)
+    {
+    }
+
+    template <typename T>
+    void operator()(Tensor<T> &out) const
+    {
+        const Tensor<T> &in = boost::get<Tensor<T>>(_in);
+        tensor_operations::pooling_layer(in, out, _pool_info, _fixed_point_position);
+    }
+
+private:
+    const TensorVariant &_in;
+    PoolingLayerInfo     _pool_info;
+    int                  _fixed_point_position;
+};
+// Softmax Layer visitor
+struct softmax_layer_visitor : public boost::static_visitor<>
+{
+public:
+    explicit softmax_layer_visitor(const TensorVariant &in)
+        : _in(in)
+    {
+    }
+
+    template <typename T>
+    void operator()(Tensor<T> &out) const
+    {
+        const auto &in = boost::get<Tensor<T>>(_in);
+        tensor_operations::softmax_layer(in, out);
+    }
+
+private:
+    const TensorVariant &_in;
+};
+// Fixed Point operations visitor
+struct fixed_point_operation_visitor : public boost::static_visitor<>
+{
+public:
+    explicit fixed_point_operation_visitor(const TensorVariant &in, FixedPointOp op)
+        : _in(in), _op(op)
+    {
+    }
+
+    template <typename T, typename std::enable_if<std::is_integral<T>::value, int>::type = 0>
+    void operator()(Tensor<T> &out) const
+    {
+        const Tensor<T> &in = boost::get<Tensor<T>>(_in);
+        tensor_operations::fixed_point_operation(in, out, _op);
+    }
+    template < typename T, typename std::enable_if < !std::is_integral<T>::value, int >::type = 0 >
+    void operator()(Tensor<T> &out) const
+    {
+        ARM_COMPUTE_ERROR("NOT SUPPORTED!");
+    }
+
+private:
+    const TensorVariant &_in;
+    FixedPointOp         _op;
+};
+// Print Tensor visitor
+struct print_visitor : public boost::static_visitor<>
+{
+public:
+    explicit print_visitor(std::ostream &out)
+        : _out(out)
+    {
+    }
+
+    template <typename T>
+    void operator()(const Tensor<T> &in) const
+    {
+        tensor_operations::print(in, _out);
+    }
+
+private:
+    std::ostream &_out;
+};
+} // namespace tensor_visitors
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+
+#endif /* __ARM_COMPUTE_TEST_TENSOR_VISITORS_H__ */
diff --git a/tests/validation/UNIT/CMakeLists.txt b/tests/validation/UNIT/CMakeLists.txt
new file mode 100644
index 0000000000..a0603f150c
--- /dev/null
+++ b/tests/validation/UNIT/CMakeLists.txt
@@ -0,0 +1,37 @@
+# Copyright (c) 2017 ARM Limited.
+#
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+cmake_minimum_required (VERSION 3.1)
+
+set(arm_compute_test_validation_UNIT_SOURCE_FILES
+    ${CMAKE_CURRENT_SOURCE_DIR}/TensorInfo.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/Utils.cpp
+)
+
+add_library(arm_compute_test_validation_UNIT OBJECT
+    ${arm_compute_test_validation_UNIT_SOURCE_FILES}
+)
+
+set(arm_compute_test_validation_TARGET_OBJECTS
+    ${arm_compute_test_validation_TARGET_OBJECTS}
+    $<TARGET_OBJECTS:arm_compute_test_validation_UNIT>
+    PARENT_SCOPE
+)
diff --git a/tests/validation/UNIT/FixedPoint.cpp b/tests/validation/UNIT/FixedPoint.cpp
new file mode 100644
index 0000000000..63d4150318
--- /dev/null
+++ b/tests/validation/UNIT/FixedPoint.cpp
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "validation/FixedPoint.h"
+
+#include "TypePrinter.h"
+#include "Utils.h"
+#include "validation/Validation.h"
+#include "validation/ValidationUserConfiguration.h"
+
+#include "boost_wrapper.h"
+
+#include <fstream>
+#include <vector>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+std::string func_names[] =
+{
+    "add", "sub", "mul", "exp", "log", "inv_sqrt"
+};
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(UNIT)
+BOOST_AUTO_TEST_SUITE(FixedPoint)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(FixedPointQS8Inputs, boost::unit_test::data::make(func_names) * boost::unit_test::data::xrange(1, 7), func_name, frac_bits)
+{
+    const std::string base_file_name = user_config.path.get() + "/dumps/" + func_name + "_Q8." + cpp11::to_string(frac_bits);
+    std::ifstream     inputs_file{ base_file_name + ".in", std::ios::binary | std::ios::in };
+
+    BOOST_TEST_INFO(base_file_name + ".in");
+    BOOST_TEST_REQUIRE(inputs_file.good());
+
+    float float_val = 0.f;
+
+    // Read first value
+    inputs_file.read(reinterpret_cast<char *>(&float_val), sizeof(float_val));
+
+    while(inputs_file.good())
+    {
+        // Convert to fixed point
+        fixed_point_arithmetic::fixed_point<int8_t> in_val(float_val, frac_bits);
+
+        // Check that the value didn't change
+        BOOST_TEST(static_cast<float>(in_val) == float_val);
+
+        // Read next value
+        inputs_file.read(reinterpret_cast<char *>(&float_val), sizeof(float_val));
+    }
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+//FIXME: Figure out how to handle expected failures properly
+// The last input argument specifies the expected number of failures for a
+// given combination of (function name, number of fractional bits) as defined
+// by the first two arguments.
+BOOST_DATA_TEST_CASE(FixedPointQS8Outputs, (boost::unit_test::data::make(func_names) * boost::unit_test::data::xrange(1, 7)) ^ (boost::unit_test::data::make({ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 8, 13, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 32, 67 })),
+                     func_name, frac_bits, expected_failures)
+{
+    const std::string base_file_name = user_config.path.get() + "/dumps/" + func_name + "_Q8." + cpp11::to_string(frac_bits);
+    std::ifstream     inputs_file{ base_file_name + ".in", std::ios::binary | std::ios::in };
+    std::ifstream     reference_file{ base_file_name + ".out", std::ios::binary | std::ios::in };
+
+    BOOST_TEST_INFO(base_file_name + ".in");
+    BOOST_TEST_REQUIRE(inputs_file.good());
+    BOOST_TEST_INFO(base_file_name + ".out");
+    BOOST_TEST_REQUIRE(reference_file.good());
+
+    const float step_size = std::pow(2.f, -frac_bits);
+
+    float   float_val      = 0.f;
+    float   ref_val        = 0.f;
+    int64_t num_mismatches = 0;
+
+    // Read first values
+    inputs_file.read(reinterpret_cast<char *>(&float_val), sizeof(float_val));
+    reference_file.read(reinterpret_cast<char *>(&ref_val), sizeof(ref_val));
+
+    while(inputs_file.good() && reference_file.good())
+    {
+        fixed_point_arithmetic::fixed_point<int8_t> in_val(float_val, frac_bits);
+        fixed_point_arithmetic::fixed_point<int8_t> out_val(0.f, frac_bits);
+
+        float tolerance = 0.f;
+
+        if(func_name == "add")
+        {
+            out_val = in_val + in_val;
+        }
+        else if(func_name == "sub")
+        {
+            out_val = in_val - in_val; //NOLINT
+        }
+        else if(func_name == "mul")
+        {
+            tolerance = 1.f * step_size;
+            out_val   = in_val * in_val;
+        }
+        else if(func_name == "exp")
+        {
+            tolerance = 2.f * step_size;
+            out_val   = fixed_point_arithmetic::exp(in_val);
+        }
+        else if(func_name == "log")
+        {
+            tolerance = 4.f * step_size;
+            out_val   = fixed_point_arithmetic::log(in_val);
+        }
+        else if(func_name == "inv_sqrt")
+        {
+            tolerance = 5.f * step_size;
+            out_val   = fixed_point_arithmetic::inv_sqrt(in_val);
+        }
+
+        BOOST_TEST_INFO("input = " << in_val);
+        BOOST_TEST_INFO("output = " << out_val);
+        BOOST_TEST_INFO("reference = " << ref_val);
+        BOOST_TEST_INFO("tolerance = " << tolerance);
+        BOOST_TEST_WARN((std::abs(static_cast<float>(out_val) - ref_val) <= tolerance));
+
+        if(std::abs(static_cast<float>(out_val) - ref_val) > tolerance)
+        {
+            ++num_mismatches;
+        }
+
+        // Read next values
+        inputs_file.read(reinterpret_cast<char *>(&float_val), sizeof(float_val));
+        reference_file.read(reinterpret_cast<char *>(&ref_val), sizeof(ref_val));
+    }
+
+    BOOST_TEST(num_mismatches == expected_failures);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/UNIT/TensorInfo.cpp b/tests/validation/UNIT/TensorInfo.cpp
new file mode 100644
index 0000000000..11ed9f6dcc
--- /dev/null
+++ b/tests/validation/UNIT/TensorInfo.cpp
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "TypePrinter.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+
+#include "boost_wrapper.h"
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::validation;
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(UNIT)
+BOOST_AUTO_TEST_SUITE(TensorInfoValidation)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(AutoPadding,
+                     boost::unit_test::data::make({ TensorShape{},
+                                                    TensorShape{ 10U },
+                                                    TensorShape{ 10U, 10U },
+                                                    TensorShape{ 10U, 10U, 10U },
+                                                    TensorShape{ 10U, 10U, 10U, 10U },
+                                                    TensorShape{ 10U, 10U, 10U, 10U, 10U },
+                                                    TensorShape{ 10U, 10U, 10U, 10U, 10U, 10U }
+                                                  })
+                     ^ boost::unit_test::data::make({ PaddingSize{ 0, 0, 0, 0 },
+                                                      PaddingSize{ 0, 36, 0, 4 },
+                                                      PaddingSize{ 4, 36, 4, 4 },
+                                                      PaddingSize{ 4, 36, 4, 4 },
+                                                      PaddingSize{ 4, 36, 4, 4 },
+                                                      PaddingSize{ 4, 36, 4, 4 },
+                                                      PaddingSize{ 4, 36, 4, 4 }
+                                                    })
+                     ^ boost::unit_test::data::make({ Strides{},
+                                                      Strides{ 1U },
+                                                      Strides{ 1U, 50U },
+                                                      Strides{ 1U, 50U, 900U },
+                                                      Strides{ 1U, 50U, 900U, 9000U },
+                                                      Strides{ 1U, 50U, 900U, 9000U, 90000U },
+                                                      Strides{ 1U, 50U, 900U, 9000U, 90000U, 900000U }
+                                                    })
+                     ^ boost::unit_test::data::make(
+{
+    0,
+    4,
+    204,
+    204,
+    204,
+    204,
+    204,
+}),
+shape, auto_padding, strides, offset)
+{
+    TensorInfo info{ shape, Format::U8 };
+
+    BOOST_TEST(!info.has_padding());
+
+    info.auto_padding();
+
+    validate(info.padding(), auto_padding);
+    BOOST_TEST(compare_dimensions(info.strides_in_bytes(), strides));
+    BOOST_TEST(info.offset_first_element_in_bytes() == offset);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/UNIT/TensorShape.cpp b/tests/validation/UNIT/TensorShape.cpp
new file mode 100644
index 0000000000..2d78cd549a
--- /dev/null
+++ b/tests/validation/UNIT/TensorShape.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "TypePrinter.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/core/TensorShape.h"
+
+#include "boost_wrapper.h"
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::validation;
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(UNIT)
+BOOST_AUTO_TEST_SUITE(TensorShapeValidation)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Construction,
+                     boost::unit_test::data::make({ TensorShape{},
+                                                    TensorShape{ 1U },
+                                                    TensorShape{ 2U },
+                                                    TensorShape{ 2U, 3U },
+                                                    TensorShape{ 2U, 3U, 5U },
+                                                    TensorShape{ 2U, 3U, 5U, 7U },
+                                                    TensorShape{ 2U, 3U, 5U, 7U, 11U },
+                                                    TensorShape{ 2U, 3U, 5U, 7U, 11U, 13U }
+                                                  })
+                     ^ boost::unit_test::data::make({ 0, 0, 1, 2, 3, 4, 5, 6 }) ^ boost::unit_test::data::make({ 0, 1, 2, 6, 30, 210, 2310, 30030 }),
+                     shape, num_dimensions, total_size)
+{
+    BOOST_TEST(shape.num_dimensions() == num_dimensions);
+    BOOST_TEST(shape.total_size() == total_size);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(SetEmpty, boost::unit_test::data::make({ 0, 1, 2, 3, 4, 5 }), dimension)
+{
+    TensorShape shape;
+
+    shape.set(dimension, 10);
+
+    BOOST_TEST(shape.num_dimensions() == dimension + 1);
+    BOOST_TEST(shape.total_size() == 10);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/UNIT/Utils.cpp b/tests/validation/UNIT/Utils.cpp
new file mode 100644
index 0000000000..7a09be52b5
--- /dev/null
+++ b/tests/validation/UNIT/Utils.cpp
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Utils.h"
+
+#include "TypePrinter.h"
+#include "validation/Validation.h"
+
+#include "boost_wrapper.h"
+
+#include <stdexcept>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::cpp11;
+using namespace arm_compute::test::validation;
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(UNIT)
+BOOST_AUTO_TEST_SUITE(Utils)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RoundHalfUp, boost::unit_test::data::make({ 1.f, 1.2f, 1.5f, 2.5f, 2.9f, -3.f, -3.5f, -3.8f, -4.3f, -4.5f }) ^ boost::unit_test::data::make({ 1.f, 1.f, 2.f, 3.f, 3.f, -3.f, -3.f, -4.f, -4.f, -4.f }),
+                     value, result)
+{
+    BOOST_TEST(cpp11::round_half_up(value) == result);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(RoundHalfEven, boost::unit_test::data::make({ 1.f, 1.2f, 1.5f, 2.5f, 2.9f, -3.f, -3.5f, -3.8f, -4.3f, -4.5f }) ^ boost::unit_test::data::make({ 1.f, 1.f, 2.f, 2.f, 3.f, -3.f, -4.f, -4.f, -4.f, -4.f }),
+                     value, result)
+{
+    BOOST_TEST(cpp11::round_half_even(value) == result);
+}
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Index2Coord, boost::unit_test::data::make({ TensorShape{ 1U }, TensorShape{ 2U }, TensorShape{ 2U, 3U } }) ^ boost::unit_test::data::make({ 0, 1, 2 }) ^
+                     boost::unit_test::data::make({ Coordinates{ 0 }, Coordinates{ 1 }, Coordinates{ 0, 1 } }), shape, index, ref_coordinate)
+{
+    Coordinates coordinate = index2coord(shape, index);
+
+    BOOST_TEST(compare_dimensions(coordinate, ref_coordinate));
+}
+
+//FIXME: Negative tests only work in debug mode
+#if 0
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Index2CoordFail, boost::unit_test::data::make({ TensorShape{}, TensorShape{ 2U }, TensorShape{ 2U } }) ^ boost::unit_test::data::make({ 0, -1, 2 }), shape, index)
+{
+    BOOST_CHECK_THROW(index2coord(shape, index), std::runtime_error);
+}
+#endif
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Coord2Index, boost::unit_test::data::make({ TensorShape{ 1U }, TensorShape{ 2U }, TensorShape{ 2U, 3U } }) ^ boost::unit_test::data::make({ Coordinates{ 0 }, Coordinates{ 1 }, Coordinates{ 0, 1 } })
+                     ^ boost::unit_test::data::make({ 0, 1, 2 }),
+                     shape, coordinate, ref_index)
+{
+    int index = coord2index(shape, coordinate);
+
+    BOOST_TEST(index == ref_index);
+}
+
+//FIXME: Negative tests only work in debug mode
+#if 0
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit") * boost::unit_test::label("nightly"))
+BOOST_DATA_TEST_CASE(Coord2IndexFail, boost::unit_test::data::make({ TensorShape{}, TensorShape{ 2U } }) ^ boost::unit_test::data::make({ Coordinates{ 0 }, Coordinates{} }), shape, coordinate)
+{
+    BOOST_CHECK_THROW(coord2index(shape, coordinate), std::runtime_error);
+}
+#endif
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
diff --git a/tests/validation/Validation.cpp b/tests/validation/Validation.cpp
new file mode 100644
index 0000000000..335d2644d3
--- /dev/null
+++ b/tests/validation/Validation.cpp
@@ -0,0 +1,359 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "Validation.h"
+
+#include "IAccessor.h"
+#include "RawTensor.h"
+#include "TypePrinter.h"
+#include "Utils.h"
+
+#include "arm_compute/core/Coordinates.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/FixedPoint.h"
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/runtime/Tensor.h"
+
+#include <array>
+#include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <iomanip>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Get the data from *ptr after casting according to @p data_type and then convert the data to double.
+ *
+ * @param[in] ptr       Pointer to value.
+ * @param[in] data_type Data type of both values.
+ *
+ * @return The data from the ptr after converted to double.
+ */
+double get_double_data(const void *ptr, DataType data_type)
+{
+    switch(data_type)
+    {
+        case DataType::U8:
+            return *reinterpret_cast<const uint8_t *>(ptr);
+        case DataType::S8:
+            return *reinterpret_cast<const int8_t *>(ptr);
+        case DataType::QS8:
+            return *reinterpret_cast<const qint8_t *>(ptr);
+        case DataType::U16:
+            return *reinterpret_cast<const uint16_t *>(ptr);
+        case DataType::S16:
+            return *reinterpret_cast<const int16_t *>(ptr);
+        case DataType::U32:
+            return *reinterpret_cast<const uint32_t *>(ptr);
+        case DataType::S32:
+            return *reinterpret_cast<const int32_t *>(ptr);
+        case DataType::U64:
+            return *reinterpret_cast<const uint64_t *>(ptr);
+        case DataType::S64:
+            return *reinterpret_cast<const int64_t *>(ptr);
+#if ENABLE_FP16
+        case DataType::F16:
+            return *reinterpret_cast<const float16_t *>(ptr);
+#endif
+        case DataType::F32:
+            return *reinterpret_cast<const float *>(ptr);
+        case DataType::F64:
+            return *reinterpret_cast<const double *>(ptr);
+        case DataType::SIZET:
+            return *reinterpret_cast<const size_t *>(ptr);
+        default:
+            ARM_COMPUTE_ERROR("NOT SUPPORTED!");
+    }
+}
+
+void check_border_element(const IAccessor &tensor, const Coordinates &id,
+                          const BorderMode &border_mode, const void *border_value,
+                          int64_t &num_elements, int64_t &num_mismatches)
+{
+    const size_t channel_size = element_size_from_data_type(tensor.data_type());
+    const auto   ptr          = static_cast<const uint8_t *>(tensor(id));
+
+    if(border_mode == BorderMode::REPLICATE)
+    {
+        Coordinates border_id{ id };
+        border_id.set(1, 0);
+        border_value = tensor(border_id);
+    }
+
+    // Iterate over all channels within one element
+    for(int channel = 0; channel < tensor.num_channels(); ++channel)
+    {
+        const size_t channel_offset = channel * channel_size;
+        const double target         = get_double_data(ptr + channel_offset, tensor.data_type());
+        const double ref            = get_double_data(static_cast<const uint8_t *>(border_value) + channel_offset, tensor.data_type());
+        const double difference     = target - ref;
+
+        BOOST_TEST_INFO("id = " << id);
+        BOOST_TEST_INFO("channel = " << channel);
+        BOOST_TEST_INFO("reference = " << std::setprecision(5) << ref);
+        BOOST_TEST_INFO("target = " << std::setprecision(5) << target);
+        BOOST_TEST_WARN(difference == 0);
+
+        if(difference != 0.f)
+        {
+            ++num_mismatches;
+        }
+
+        ++num_elements;
+    }
+}
+
+void check_single_element(const Coordinates &id, const IAccessor &tensor, const RawTensor &reference, float tolerance_value,
+                          uint64_t wrap_range, int min_channels, size_t channel_size, int64_t &num_mismatches, int64_t &num_elements)
+{
+    const auto ptr     = static_cast<const uint8_t *>(tensor(id));
+    const auto ref_ptr = static_cast<const uint8_t *>(reference(id));
+
+    // Iterate over all channels within one element
+    for(int channel = 0; channel < min_channels; ++channel)
+    {
+        const size_t channel_offset = channel * channel_size;
+        const double target         = get_double_data(ptr + channel_offset, reference.data_type());
+        const double ref            = get_double_data(ref_ptr + channel_offset, reference.data_type());
+        const double difference     = target - ref;
+
+        BOOST_TEST_INFO("id = " << id);
+        BOOST_TEST_INFO("channel = " << channel);
+        BOOST_TEST_INFO("reference = " << std::setprecision(5) << ref);
+        BOOST_TEST_INFO("target = " << std::setprecision(5) << target);
+        BOOST_TEST_WARN(difference == 0);
+
+        if(std::abs(difference) > tolerance_value)
+        {
+            // If no special cases for tolerating wrappping cases
+            // or the special case of wrapping exceeds tolerance_value
+            if(wrap_range == 0 || (wrap_range - std::abs(difference)) > tolerance_value)
+            {
+                ++num_mismatches;
+            }
+        }
+        ++num_elements;
+    }
+}
+} // namespace
+
+void validate(const arm_compute::ValidRegion &region, const arm_compute::ValidRegion &reference)
+{
+    BOOST_TEST(region.anchor.num_dimensions() == reference.anchor.num_dimensions());
+    BOOST_TEST(region.shape.num_dimensions() == reference.shape.num_dimensions());
+
+    for(unsigned int d = 0; d < region.anchor.num_dimensions(); ++d)
+    {
+        BOOST_TEST(region.anchor[d] == reference.anchor[d]);
+    }
+
+    for(unsigned int d = 0; d < region.shape.num_dimensions(); ++d)
+    {
+        BOOST_TEST(region.shape[d] == reference.shape[d]);
+    }
+}
+
+void validate(const arm_compute::PaddingSize &padding, const arm_compute::PaddingSize &reference)
+{
+    BOOST_TEST(padding.top == reference.top);
+    BOOST_TEST(padding.right == reference.right);
+    BOOST_TEST(padding.bottom == reference.bottom);
+    BOOST_TEST(padding.left == reference.left);
+}
+
+void validate(const IAccessor &tensor, const RawTensor &reference, float tolerance_value, float tolerance_number, uint64_t wrap_range)
+{
+    // Validate with valid region covering the entire shape
+    validate(tensor, reference, shape_to_valid_region(tensor.shape()), tolerance_value, tolerance_number, wrap_range);
+}
+
+void validate(const IAccessor &tensor, const RawTensor &reference, const ValidRegion &valid_region, float tolerance_value, float tolerance_number, uint64_t wrap_range)
+{
+    int64_t num_mismatches = 0;
+    int64_t num_elements   = 0;
+
+    BOOST_TEST(tensor.element_size() == reference.element_size());
+    BOOST_TEST(tensor.format() == reference.format());
+    BOOST_TEST(tensor.data_type() == reference.data_type());
+    BOOST_TEST(tensor.num_channels() == reference.num_channels());
+    BOOST_TEST(compare_dimensions(tensor.shape(), reference.shape()));
+
+    const int    min_elements = std::min(tensor.num_elements(), reference.num_elements());
+    const int    min_channels = std::min(tensor.num_channels(), reference.num_channels());
+    const size_t channel_size = element_size_from_data_type(reference.data_type());
+
+    // Iterate over all elements within valid region, e.g. U8, S16, RGB888, ...
+    for(int element_idx = 0; element_idx < min_elements; ++element_idx)
+    {
+        const Coordinates id = index2coord(reference.shape(), element_idx);
+        if(is_in_valid_region(valid_region, id))
+        {
+            check_single_element(id, tensor, reference, tolerance_value, wrap_range, min_channels, channel_size, num_mismatches, num_elements);
+        }
+    }
+
+    const int64_t absolute_tolerance_number = tolerance_number * num_elements;
+    const float   percent_mismatches        = static_cast<float>(num_mismatches) / num_elements * 100.f;
+
+    BOOST_TEST(num_mismatches <= absolute_tolerance_number,
+               num_mismatches << " values (" << std::setprecision(2) << percent_mismatches
+               << "%) mismatched (maximum tolerated " << std::setprecision(2) << tolerance_number << "%)");
+}
+
+void validate(const IAccessor &tensor, const void *reference_value)
+{
+    BOOST_TEST_REQUIRE((reference_value != nullptr));
+
+    int64_t      num_mismatches = 0;
+    int64_t      num_elements   = 0;
+    const size_t channel_size   = element_size_from_data_type(tensor.data_type());
+
+    // Iterate over all elements, e.g. U8, S16, RGB888, ...
+    for(int element_idx = 0; element_idx < tensor.num_elements(); ++element_idx)
+    {
+        const Coordinates id = index2coord(tensor.shape(), element_idx);
+
+        const auto ptr = static_cast<const uint8_t *>(tensor(id));
+
+        // Iterate over all channels within one element
+        for(int channel = 0; channel < tensor.num_channels(); ++channel)
+        {
+            const size_t channel_offset = channel * channel_size;
+            const double target         = get_double_data(ptr + channel_offset, tensor.data_type());
+            const double ref            = get_double_data(reference_value, tensor.data_type());
+            const double difference     = target - ref;
+
+            BOOST_TEST_INFO("id = " << id);
+            BOOST_TEST_INFO("channel = " << channel);
+            BOOST_TEST_INFO("reference = " << std::setprecision(5) << ref);
+            BOOST_TEST_INFO("target = " << std::setprecision(5) << target);
+            BOOST_TEST_WARN(difference == 0);
+
+            if(difference != 0.f)
+            {
+                ++num_mismatches;
+            }
+
+            ++num_elements;
+        }
+    }
+
+    const float percent_mismatches = static_cast<float>(num_mismatches) / num_elements * 100.f;
+
+    BOOST_TEST(num_mismatches == 0,
+               num_mismatches << " values (" << std::setprecision(2) << percent_mismatches << "%) mismatched");
+}
+
+void validate(const IAccessor &tensor, BorderSize border_size, const BorderMode &border_mode, const void *border_value)
+{
+    if(border_mode == BorderMode::UNDEFINED)
+    {
+        return;
+    }
+    else if(border_mode == BorderMode::CONSTANT)
+    {
+        BOOST_TEST((border_value != nullptr));
+    }
+
+    int64_t   num_mismatches = 0;
+    int64_t   num_elements   = 0;
+    const int slice_size     = tensor.shape()[0] * tensor.shape()[1];
+
+    for(int element_idx = 0; element_idx < tensor.num_elements(); element_idx += slice_size)
+    {
+        Coordinates id = index2coord(tensor.shape(), element_idx);
+
+        // Top border
+        for(int y = -border_size.top; y < 0; ++y)
+        {
+            id.set(1, y);
+
+            for(int x = -border_size.left; x < static_cast<int>(tensor.shape()[0]) + static_cast<int>(border_size.right); ++x)
+            {
+                id.set(0, x);
+
+                check_border_element(tensor, id, border_mode, border_value, num_elements, num_mismatches);
+            }
+        }
+
+        // Bottom border
+        for(int y = tensor.shape()[1]; y < static_cast<int>(tensor.shape()[1]) + static_cast<int>(border_size.bottom); ++y)
+        {
+            id.set(1, y);
+
+            for(int x = -border_size.left; x < static_cast<int>(tensor.shape()[0]) + static_cast<int>(border_size.right); ++x)
+            {
+                id.set(0, x);
+
+                check_border_element(tensor, id, border_mode, border_value, num_elements, num_mismatches);
+            }
+        }
+
+        // Left/right border
+        for(int y = 0; y < static_cast<int>(tensor.shape()[1]); ++y)
+        {
+            id.set(1, y);
+
+            // Left border
+            for(int x = -border_size.left; x < 0; ++x)
+            {
+                id.set(0, x);
+
+                check_border_element(tensor, id, border_mode, border_value, num_elements, num_mismatches);
+            }
+
+            // Right border
+            for(int x = tensor.shape()[0]; x < static_cast<int>(tensor.shape()[0]) + static_cast<int>(border_size.right); ++x)
+            {
+                id.set(0, x);
+
+                check_border_element(tensor, id, border_mode, border_value, num_elements, num_mismatches);
+            }
+        }
+    }
+
+    const float percent_mismatches = static_cast<float>(num_mismatches) / num_elements * 100.f;
+
+    BOOST_TEST(num_mismatches == 0,
+               num_mismatches << " values (" << std::setprecision(2) << percent_mismatches << "%) mismatched");
+}
+
+void validate(std::vector<unsigned int> classified_labels, std::vector<unsigned int> expected_labels)
+{
+    BOOST_TEST(expected_labels.size() != 0);
+    BOOST_TEST(classified_labels.size() == expected_labels.size());
+
+    for(unsigned int i = 0; i < expected_labels.size(); ++i)
+    {
+        BOOST_TEST(classified_labels[i] == expected_labels[i]);
+    }
+}
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/Validation.h b/tests/validation/Validation.h
new file mode 100644
index 0000000000..865d05b1f6
--- /dev/null
+++ b/tests/validation/Validation.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_REFERENCE_VALIDATION_H__
+#define __ARM_COMPUTE_TEST_REFERENCE_VALIDATION_H__
+
+#include "arm_compute/core/Types.h"
+
+#include "boost_wrapper.h"
+
+#include <vector>
+
+namespace arm_compute
+{
+class Tensor;
+
+namespace test
+{
+class RawTensor;
+class IAccessor;
+
+namespace validation
+{
+template <typename T>
+boost::test_tools::predicate_result compare_dimensions(const Dimensions<T> &dimensions1, const Dimensions<T> &dimensions2)
+{
+    if(dimensions1.num_dimensions() != dimensions2.num_dimensions())
+    {
+        boost::test_tools::predicate_result result(false);
+        result.message() << "Different dimensionality [" << dimensions1.num_dimensions() << "!=" << dimensions2.num_dimensions() << "]";
+        return result;
+    }
+
+    for(unsigned int i = 0; i < dimensions1.num_dimensions(); ++i)
+    {
+        if(dimensions1[i] != dimensions2[i])
+        {
+            boost::test_tools::predicate_result result(false);
+            result.message() << "Mismatch in dimension " << i << " [" << dimensions1[i] << "!=" << dimensions2[i] << "]";
+            return result;
+        }
+    }
+
+    return true;
+}
+
+/** Validate valid regions.
+ *
+ * - Dimensionality has to be the same.
+ * - Anchors have to match.
+ * - Shapes have to match.
+ */
+void validate(const arm_compute::ValidRegion &region, const arm_compute::ValidRegion &reference);
+
+/** Validate padding.
+ *
+ * Padding on all sides has to be the same.
+ */
+void validate(const arm_compute::PaddingSize &padding, const arm_compute::PaddingSize &reference);
+
+/** Validate tensors.
+ *
+ * - Dimensionality has to be the same.
+ * - All values have to match.
+ *
+ * @note: wrap_range allows cases where reference tensor rounds up to the wrapping point, causing it to wrap around to
+ * zero while the test tensor stays at wrapping point to pass. This may permit true erroneous cases (difference between
+ * reference tensor and test tensor is multiple of wrap_range), but such errors would be detected by
+ * other test cases.
+ */
+void validate(const IAccessor &tensor, const RawTensor &reference, float tolerance_value = 0.f, float tolerance_number = 0.f, uint64_t wrap_range = 0);
+
+/** Validate tensors with valid region.
+ *
+ * - Dimensionality has to be the same.
+ * - All values have to match.
+ *
+ * @note: wrap_range allows cases where reference tensor rounds up to the wrapping point, causing it to wrap around to
+ * zero while the test tensor stays at wrapping point to pass. This may permit true erroneous cases (difference between
+ * reference tensor and test tensor is multiple of wrap_range), but such errors would be detected by
+ * other test cases.
+ */
+void validate(const IAccessor &tensor, const RawTensor &reference, const ValidRegion &valid_region, float tolerance_value = 0.f, float tolerance_number = 0.f, uint64_t wrap_range = 0);
+
+/** Validate tensors against constant value.
+ *
+ * - All values have to match.
+ */
+void validate(const IAccessor &tensor, const void *reference_value);
+
+/** Validate border against a constant value.
+ *
+ * - All border values have to match the specified value if mode is CONSTANT.
+ * - All border values have to be replicated if mode is REPLICATE.
+ * - Nothing is validated for mode UNDEFINED.
+ */
+void validate(const IAccessor &tensor, BorderSize border_size, const BorderMode &border_mode, const void *border_value);
+
+/** Validate classified labels against expected ones.
+ *
+ * - All values should match
+ */
+void validate(std::vector<unsigned int> classified_labels, std::vector<unsigned int> expected_labels);
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/validation/ValidationProgramOptions.cpp b/tests/validation/ValidationProgramOptions.cpp
new file mode 100644
index 0000000000..adb8c5ab6c
--- /dev/null
+++ b/tests/validation/ValidationProgramOptions.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ValidationProgramOptions.h"
+
+#include <thread>
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Weffc++"
+#pragma GCC diagnostic ignored "-Wnon-virtual-dtor"
+#pragma GCC diagnostic ignored "-Wctor-dtor-privacy"
+#include "boost/program_options.hpp"
+#pragma GCC diagnostic pop
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+ValidationProgramOptions::ValidationProgramOptions()
+{
+    boost::program_options::options_description options("Validation options");
+    options.add_options()("runs", boost::program_options::value<unsigned int>()->default_value(1), "Repetitions per test");
+    options.add_options()("threads", boost::program_options::value<unsigned int>()->default_value(std::thread::hardware_concurrency()), "Number of parallel CPU threads");
+    add_options(options);
+}
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/ValidationProgramOptions.h b/tests/validation/ValidationProgramOptions.h
new file mode 100644
index 0000000000..bf30db960d
--- /dev/null
+++ b/tests/validation/ValidationProgramOptions.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_VALIDATION_PROGRAM_OPTIONS_H__
+#define __ARM_COMPUTE_TEST_VALIDATION_PROGRAM_OPTIONS_H__
+
+#include "ProgramOptions.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+/** Subclass of @ref ProgramOptions that adds validation specific options. */
+class ValidationProgramOptions : public ProgramOptions
+{
+public:
+    /** Defines additonal options. */
+    ValidationProgramOptions();
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/validation/ValidationUserConfiguration.h b/tests/validation/ValidationUserConfiguration.h
new file mode 100644
index 0000000000..28b58e8375
--- /dev/null
+++ b/tests/validation/ValidationUserConfiguration.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_REFERENCE_VALIDATION_USER_CONFIGURATION_H__
+#define __ARM_COMPUTE_TEST_REFERENCE_VALIDATION_USER_CONFIGURATION_H__
+
+#include "UserConfiguration.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+// Validation requires no specific configuration
+using ValidationUserConfiguration = UserConfiguration;
+} // namespace validation
+
+extern validation::ValidationUserConfiguration user_config;
+} // namespace test
+} // namespace arm_compute
+#endif
diff --git a/tests/validation/main.cpp b/tests/validation/main.cpp
new file mode 100644
index 0000000000..844ee36200
--- /dev/null
+++ b/tests/validation/main.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#define BOOST_TEST_ALTERNATIVE_INIT_API
+
+#include "Globals.h"
+#include "TensorLibrary.h"
+#include "Utils.h"
+#include "ValidationProgramOptions.h"
+#include "ValidationUserConfiguration.h"
+
+#include "arm_compute/runtime/Scheduler.h"
+
+#include "boost_wrapper.h"
+
+#include <iostream>
+#include <memory>
+#include <random>
+
+using namespace arm_compute::test;
+using namespace arm_compute::test::validation;
+
+namespace arm_compute
+{
+namespace test
+{
+ValidationUserConfiguration    user_config;
+std::unique_ptr<TensorLibrary> library;
+} // namespace test
+} // namespace arm_compute
+
+struct GlobalFixture
+{
+    GlobalFixture()
+    {
+        if(user_config.seed.is_set())
+        {
+            library = cpp14::make_unique<TensorLibrary>(user_config.path.get(), user_config.seed);
+        }
+        else
+        {
+            library = cpp14::make_unique<TensorLibrary>(user_config.path.get());
+        }
+
+        BOOST_TEST_MESSAGE("Seed: " << library->seed());
+    }
+};
+
+BOOST_GLOBAL_FIXTURE(GlobalFixture);
+
+bool init_unit_test()
+{
+    boost::unit_test::framework::master_test_suite().p_name.value = "Compute Library Validation Tests";
+
+    ValidationProgramOptions options;
+
+    int   &argc = boost::unit_test::framework::master_test_suite().argc;
+    char **argv = boost::unit_test::framework::master_test_suite().argv;
+
+    try
+    {
+        options.parse_commandline(argc, argv);
+
+        if(options.wants_help())
+        {
+            std::cout << "Usage: " << argv[0] << " [options] PATH\n";
+            std::cout << options.get_help() << "\n";
+            return false;
+        }
+
+        user_config = ValidationUserConfiguration(options);
+    }
+    catch(const boost::program_options::required_option &err)
+    {
+        std::cerr << "Error: " << err.what() << "\n";
+        std::cout << "\nUsage: " << argv[0] << " [options] PATH\n";
+        std::cout << options.get_help() << "\n";
+        return false;
+    }
+
+    std::cout << "Using " << user_config.threads << " CPU " << (user_config.threads == 1 ? "thread" : "threads") << "\n";
+    arm_compute::Scheduler::get().set_num_threads(user_config.threads);
+    return true;
+}
diff --git a/tests/validation/system_tests/CL/AlexNet.cpp b/tests/validation/system_tests/CL/AlexNet.cpp
new file mode 100644
index 0000000000..f7a88207c4
--- /dev/null
+++ b/tests/validation/system_tests/CL/AlexNet.cpp
@@ -0,0 +1,111 @@
+#ifdef INTERNAL_ONLY //FIXME Delete this file before the release
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CL/CLAccessor.h"
+#include "CL/Helper.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/runtime/CL/CLSubTensor.h"
+#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
+#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
+#include "arm_compute/runtime/CL/functions/CLNormalizationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h"
+#include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h"
+
+#include "model_objects/AlexNet.h"
+
+#include <array>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::cl;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+using CLAlexNetModel = model_objects::AlexNet<ICLTensor,
+      CLTensor,
+      CLSubTensor,
+      CLAccessor,
+      CLActivationLayer,
+      CLConvolutionLayer,
+      CLFullyConnectedLayer,
+      CLNormalizationLayer,
+      CLPoolingLayer,
+      CLSoftmaxLayer>;
+std::vector<unsigned int> compute_alexnet(unsigned int batches, std::string input_file)
+{
+    std::vector<std::string> weight_files = { "cnn_data/alexnet_model/conv1_w.dat",
+                                              "cnn_data/alexnet_model/conv2_w.dat",
+                                              "cnn_data/alexnet_model/conv3_w.dat",
+                                              "cnn_data/alexnet_model/conv4_w.dat",
+                                              "cnn_data/alexnet_model/conv5_w.dat",
+                                              "cnn_data/alexnet_model/fc6_w.dat",
+                                              "cnn_data/alexnet_model/fc7_w.dat",
+                                              "cnn_data/alexnet_model/fc8_w.dat"
+                                            };
+
+    std::vector<std::string> bias_files = { "cnn_data/alexnet_model/conv1_b.dat",
+                                            "cnn_data/alexnet_model/conv2_b.dat",
+                                            "cnn_data/alexnet_model/conv3_b.dat",
+                                            "cnn_data/alexnet_model/conv4_b.dat",
+                                            "cnn_data/alexnet_model/conv5_b.dat",
+                                            "cnn_data/alexnet_model/fc6_b.dat",
+                                            "cnn_data/alexnet_model/fc7_b.dat",
+                                            "cnn_data/alexnet_model/fc8_b.dat"
+                                          };
+    CLAlexNetModel network{};
+    network.init_weights(batches);
+    network.build();
+    network.allocate();
+    network.fill(weight_files, bias_files);
+    network.feed(std::move(input_file));
+    network.run();
+
+    return network.get_classifications();
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(SYSTEM_TESTS)
+BOOST_AUTO_TEST_SUITE(CL)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_AUTO_TEST_CASE(AlexNet)
+{
+    // Compute alexnet
+    std::vector<unsigned int> classified_labels = compute_alexnet(1, "cnn_data/imagenet_data/shark.dat");
+
+    // Expected labels
+    std::vector<unsigned int> expected_labels = { 2 };
+
+    // Validate labels
+    validate(classified_labels, expected_labels);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
+#endif /* INTERNAL_ONLY */
diff --git a/tests/validation/system_tests/CL/LeNet5.cpp b/tests/validation/system_tests/CL/LeNet5.cpp
new file mode 100644
index 0000000000..8b83cfa3a6
--- /dev/null
+++ b/tests/validation/system_tests/CL/LeNet5.cpp
@@ -0,0 +1,94 @@
+#ifdef INTERNAL_ONLY //FIXME Delete this file before the release
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "CL/CLAccessor.h"
+#include "CL/Helper.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
+#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
+#include "arm_compute/runtime/CL/functions/CLPoolingLayer.h"
+#include "arm_compute/runtime/CL/functions/CLSoftmaxLayer.h"
+
+#include "model_objects/LeNet5.h"
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::cl;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+using CLLeNet5Model = model_objects::LeNet5<CLTensor,
+      CLAccessor,
+      CLActivationLayer,
+      CLConvolutionLayer,
+      CLFullyConnectedLayer,
+      CLPoolingLayer,
+      CLSoftmaxLayer>;
+std::vector<unsigned int> compute_lenet5(unsigned int batches, std::string input_file)
+{
+    std::vector<std::string> weight_files = { "cnn_data/lenet_model/conv1_w.dat",
+                                              "cnn_data/lenet_model/conv2_w.dat",
+                                              "cnn_data/lenet_model/ip1_w.dat",
+                                              "cnn_data/lenet_model/ip2_w.dat"
+                                            };
+
+    std::vector<std::string> bias_files = { "cnn_data/lenet_model/conv1_b.dat",
+                                            "cnn_data/lenet_model/conv2_b.dat",
+                                            "cnn_data/lenet_model/ip1_b.dat",
+                                            "cnn_data/lenet_model/ip2_b.dat"
+                                          };
+    CLLeNet5Model network{};
+    network.build(batches);
+    network.fill(weight_files, bias_files);
+    network.feed(std::move(input_file));
+    network.run();
+
+    return network.get_classifications();
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(SYSTEM_TESTS)
+BOOST_AUTO_TEST_SUITE(CL)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_AUTO_TEST_CASE(LeNet5)
+{
+    // Compute alexnet
+    std::vector<unsigned int> classified_labels = compute_lenet5(10, "cnn_data/mnist_data/input100.dat");
+
+    // Expected labels
+    std::vector<unsigned int> expected_labels = { 7, 2, 1, 0, 4, 1, 4, 9, 5, 9 };
+
+    // Validate labels
+    validate(classified_labels, expected_labels);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
+#endif /* INTERNAL_ONLY */
diff --git a/tests/validation/system_tests/NEON/AlexNet.cpp b/tests/validation/system_tests/NEON/AlexNet.cpp
new file mode 100644
index 0000000000..e56110d8de
--- /dev/null
+++ b/tests/validation/system_tests/NEON/AlexNet.cpp
@@ -0,0 +1,112 @@
+#ifdef INTERNAL_ONLY //FIXME Delete this file before the release
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
+#include "arm_compute/runtime/NEON/functions/NENormalizationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h"
+#include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h"
+#include "arm_compute/runtime/SubTensor.h"
+
+#include "model_objects/AlexNet.h"
+
+#include <array>
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+using NEAlexNetModel = model_objects::AlexNet<ITensor,
+      Tensor,
+      SubTensor,
+      NEAccessor,
+      NEActivationLayer,
+      NEConvolutionLayer,
+      NEFullyConnectedLayer,
+      NENormalizationLayer,
+      NEPoolingLayer,
+      NESoftmaxLayer>;
+std::vector<unsigned int> compute_alexnet(unsigned int batches, std::string input_file)
+{
+    std::vector<std::string> weight_files = { "cnn_data/alexnet_model/conv1_w.dat",
+                                              "cnn_data/alexnet_model/conv2_w.dat",
+                                              "cnn_data/alexnet_model/conv3_w.dat",
+                                              "cnn_data/alexnet_model/conv4_w.dat",
+                                              "cnn_data/alexnet_model/conv5_w.dat",
+                                              "cnn_data/alexnet_model/fc6_w.dat",
+                                              "cnn_data/alexnet_model/fc7_w.dat",
+                                              "cnn_data/alexnet_model/fc8_w.dat"
+                                            };
+
+    std::vector<std::string> bias_files = { "cnn_data/alexnet_model/conv1_b.dat",
+                                            "cnn_data/alexnet_model/conv2_b.dat",
+                                            "cnn_data/alexnet_model/conv3_b.dat",
+                                            "cnn_data/alexnet_model/conv4_b.dat",
+                                            "cnn_data/alexnet_model/conv5_b.dat",
+                                            "cnn_data/alexnet_model/fc6_b.dat",
+                                            "cnn_data/alexnet_model/fc7_b.dat",
+                                            "cnn_data/alexnet_model/fc8_b.dat"
+                                          };
+    NEAlexNetModel network{};
+
+    network.init_weights(batches);
+    network.build();
+    network.allocate();
+    network.fill(weight_files, bias_files);
+    network.feed(std::move(input_file));
+    network.run();
+
+    return network.get_classifications();
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(SYSTEM_TESTS)
+BOOST_AUTO_TEST_SUITE(NEON)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_AUTO_TEST_CASE(AlexNet)
+{
+    // Compute alexnet
+    std::vector<unsigned int> classified_labels = compute_alexnet(1, "cnn_data/imagenet_data/shark.dat");
+
+    // Expected labels
+    std::vector<unsigned int> expected_labels = { 2 };
+
+    // Validate labels
+    validate(classified_labels, expected_labels);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
+#endif /* INTERNAL_ONLY */
diff --git a/tests/validation/system_tests/NEON/LeNet5.cpp b/tests/validation/system_tests/NEON/LeNet5.cpp
new file mode 100644
index 0000000000..a82b84a997
--- /dev/null
+++ b/tests/validation/system_tests/NEON/LeNet5.cpp
@@ -0,0 +1,94 @@
+#ifdef INTERNAL_ONLY //FIXME Delete this file before the release
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "NEON/Helper.h"
+#include "NEON/NEAccessor.h"
+#include "validation/Validation.h"
+
+#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h"
+#include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h"
+
+#include "model_objects/LeNet5.h"
+
+using namespace arm_compute;
+using namespace arm_compute::test;
+using namespace arm_compute::test::neon;
+using namespace arm_compute::test::validation;
+
+namespace
+{
+using NELeNet5Model = model_objects::LeNet5<Tensor,
+      NEAccessor,
+      NEActivationLayer,
+      NEConvolutionLayer,
+      NEFullyConnectedLayer,
+      NEPoolingLayer,
+      NESoftmaxLayer>;
+std::vector<unsigned int> compute_lenet5(unsigned int batches, std::string input_file)
+{
+    std::vector<std::string> weight_files = { "cnn_data/lenet_model/conv1_w.dat",
+                                              "cnn_data/lenet_model/conv2_w.dat",
+                                              "cnn_data/lenet_model/ip1_w.dat",
+                                              "cnn_data/lenet_model/ip2_w.dat"
+                                            };
+
+    std::vector<std::string> bias_files = { "cnn_data/lenet_model/conv1_b.dat",
+                                            "cnn_data/lenet_model/conv2_b.dat",
+                                            "cnn_data/lenet_model/ip1_b.dat",
+                                            "cnn_data/lenet_model/ip2_b.dat"
+                                          };
+    NELeNet5Model network{};
+    network.build(batches);
+    network.fill(weight_files, bias_files);
+    network.feed(std::move(input_file));
+    network.run();
+
+    return network.get_classifications();
+}
+} // namespace
+
+#ifndef DOXYGEN_SKIP_THIS
+BOOST_AUTO_TEST_SUITE(SYSTEM_TESTS)
+BOOST_AUTO_TEST_SUITE(NEON)
+
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_AUTO_TEST_CASE(LeNet5)
+{
+    // Compute alexnet
+    std::vector<unsigned int> classified_labels = compute_lenet5(10, "cnn_data/mnist_data/input100.dat");
+
+    // Expected labels
+    std::vector<unsigned int> expected_labels = { 7, 2, 1, 0, 4, 1, 4, 9, 5, 9 };
+
+    // Validate labels
+    validate(classified_labels, expected_labels);
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+#endif
+#endif /* INTERNAL_ONLY */
-- 
cgit v1.2.1