From 7068f9900d136312318ff430aef588b14e0c87ad Mon Sep 17 00:00:00 2001
From: Anthony Barbier <anthony.barbier@arm.com>
Date: Thu, 26 Oct 2017 15:23:08 +0100
Subject: COMPMID-631: Merge branches/gles_compute branch

Last commit:
commit b25c5f68042b0c81bf611d59a1bb8535e1c42497
Author: Xinghang Zhou <xinghang.zhou@arm.com>
Date:   Wed Oct 25 18:48:10 2017 +0800

    Synced validation's tolerances of GCSoftmax from cl side

Change-Id: Ibe72054205c1c8721845d679a31af7ed0a7c5cf6
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/93283
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
---
 arm_compute/core/CL/CLHelpers.h                    |   2 +-
 .../CL/kernels/CLBatchNormalizationLayerKernel.h   |   8 +-
 arm_compute/core/Error.h                           |  13 +-
 arm_compute/core/GLES_COMPUTE/GCHelpers.h          |  64 +++++
 arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h    | 306 +++++++++++++++++++++
 arm_compute/core/GLES_COMPUTE/GCKernels.h          |  48 ++++
 arm_compute/core/GLES_COMPUTE/IGCKernel.h          | 179 ++++++++++++
 arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h  |  41 +++
 arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h  |  43 +++
 arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h    |  66 +++++
 arm_compute/core/GLES_COMPUTE/IGCTensor.h          |  99 +++++++
 arm_compute/core/GLES_COMPUTE/OpenGLES.h           | 165 +++++++++++
 .../kernels/GCAbsoluteDifferenceKernel.h           |  71 +++++
 .../GLES_COMPUTE/kernels/GCActivationLayerKernel.h |  68 +++++
 .../kernels/GCBatchNormalizationLayerKernel.h      |  77 ++++++
 .../core/GLES_COMPUTE/kernels/GCCol2ImKernel.h     |  92 +++++++
 .../kernels/GCDepthConcatenateKernel.h             |  76 +++++
 .../kernels/GCDirectConvolutionLayerKernel.h       |  87 ++++++
 .../core/GLES_COMPUTE/kernels/GCDropoutKernel.h    |  79 ++++++
 .../core/GLES_COMPUTE/kernels/GCFillBorderKernel.h |  77 ++++++
 .../kernels/GCGEMMInterleave4x4Kernel.h            |  80 ++++++
 .../kernels/GCGEMMMatrixAccumulateBiasesKernel.h   |  63 +++++
 .../kernels/GCGEMMMatrixAdditionKernel.h           |  70 +++++
 .../kernels/GCGEMMMatrixMultiplyKernel.h           |  79 ++++++
 .../kernels/GCGEMMTranspose1xWKernel.h             |  67 +++++
 .../core/GLES_COMPUTE/kernels/GCIm2ColKernel.h     | 109 ++++++++
 .../kernels/GCNormalizationLayerKernel.h           |  72 +++++
 .../kernels/GCPixelWiseMultiplicationKernel.h      |  70 +++++
 .../GLES_COMPUTE/kernels/GCPoolingLayerKernel.h    |  70 +++++
 .../GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h    | 112 ++++++++
 .../core/GLES_COMPUTE/kernels/GCTransposeKernel.h  |  52 ++++
 arm_compute/core/Log.h                             |  54 ++--
 .../NEON/kernels/NEBatchNormalizationLayerKernel.h |   4 +-
 arm_compute/core/Utils.h                           |  23 +-
 .../CL/functions/CLBatchNormalizationLayer.h       |   6 +-
 .../runtime/CL/functions/CLNormalizationLayer.h    |   4 +-
 arm_compute/runtime/GLES_COMPUTE/GCFunctions.h     |  45 +++
 arm_compute/runtime/GLES_COMPUTE/GCScheduler.h     |  73 +++++
 arm_compute/runtime/GLES_COMPUTE/GCTensor.h        | 100 +++++++
 .../runtime/GLES_COMPUTE/GCTensorAllocator.h       | 128 +++++++++
 .../runtime/GLES_COMPUTE/IGCSimpleFunction.h       |  50 ++++
 .../GLES_COMPUTE/functions/GCAbsoluteDifference.h  |  52 ++++
 .../GLES_COMPUTE/functions/GCActivationLayer.h     |  53 ++++
 .../functions/GCBatchNormalizationLayer.h          |  67 +++++
 .../GLES_COMPUTE/functions/GCDepthConcatenate.h    |  67 +++++
 .../functions/GCDirectConvolutionLayer.h           |  59 ++++
 .../GLES_COMPUTE/functions/GCDropoutLayer.h        |  63 +++++
 .../runtime/GLES_COMPUTE/functions/GCFillBorder.h  |  52 ++++
 .../GLES_COMPUTE/functions/GCFullyConnectedLayer.h |  96 +++++++
 .../runtime/GLES_COMPUTE/functions/GCGEMM.h        |  85 ++++++
 .../GLES_COMPUTE/functions/GCGEMMInterleave4x4.h   |  50 ++++
 .../GLES_COMPUTE/functions/GCGEMMTranspose1xW.h    |  47 ++++
 .../GLES_COMPUTE/functions/GCNormalizationLayer.h  |  71 +++++
 .../functions/GCPixelWiseMultiplication.h          |  48 ++++
 .../GLES_COMPUTE/functions/GCPoolingLayer.h        |  53 ++++
 .../GLES_COMPUTE/functions/GCSoftmaxLayer.h        |  69 +++++
 .../runtime/GLES_COMPUTE/functions/GCTranspose.h   |  50 ++++
 .../NEON/functions/NEBatchNormalizationLayer.h     |   4 +-
 .../runtime/NEON/functions/NENormalizationLayer.h  |   4 +-
 59 files changed, 3945 insertions(+), 37 deletions(-)
 create mode 100644 arm_compute/core/GLES_COMPUTE/GCHelpers.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/GCKernels.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/IGCKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/IGCTensor.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/OpenGLES.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/GCFunctions.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/GCScheduler.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/GCTensor.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCFillBorder.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCTranspose.h

(limited to 'arm_compute')

diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h
index b93bae8d82..365ecb06c4 100644
--- a/arm_compute/core/CL/CLHelpers.h
+++ b/arm_compute/core/CL/CLHelpers.h
@@ -43,7 +43,7 @@ struct enable_bitwise_ops<arm_compute::GPUTarget>
 };
 
 /** Max vector width of an OpenCL vector */
-static constexpr const unsigned int max_cl_vector_width = 16;
+static constexpr unsigned int max_cl_vector_width = 16;
 
 /** Translates a tensor data type to the appropriate OpenCL type.
  *
diff --git a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
index a24432145a..a5559bf8aa 100644
--- a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
@@ -55,24 +55,24 @@ public:
      * @param[in, out] input   Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
      *                         3 lower dimensions represent a single input with dimensions [width, height, FM].
      *                         The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32.
+     * @param[out]     output  Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
      * @param[in]      mean    Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      var     Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
-     * @param[in]      gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      beta    Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]      gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      epsilon Small value to avoid division with zero.
-     * @param[out]     output  Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
      */
     void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon);
     /** Static function to check if given info will lead to a valid configuration of @ref CLBatchNormalizationLayerKernel
      *
      * @param[in] input   Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result.
      *                    3 lower dimensions represent a single input with dimensions [width, height, FM].
-     * @param[in] output  Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input
      *                    The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32.
+     * @param[in] output  Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input
      * @param[in] mean    Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in] var     Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
-     * @param[in] gamma   Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in] beta    Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in] gamma   Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in] epsilon Small value to avoid division with zero.
      *
      * @return an error status
diff --git a/arm_compute/core/Error.h b/arm_compute/core/Error.h
index fa3f9c0615..277db9d64d 100644
--- a/arm_compute/core/Error.h
+++ b/arm_compute/core/Error.h
@@ -24,6 +24,7 @@
 #ifndef __ARM_COMPUTE_ERROR_H__
 #define __ARM_COMPUTE_ERROR_H__
 
+#include <stdarg.h>
 #include <stdexcept>
 #include <string>
 
@@ -106,6 +107,16 @@ private:
     std::string _description;
 };
 
+/** Creates an error containing the error message from variable argument list
+ *
+ * @param[in] error_code Error code
+ * @param[in] function   Function in which the error occurred.
+ * @param[in] file       Name of the file where the error occurred.
+ * @param[in] line       Line on which the error occurred.
+ * @param[in] msg        Message to display before aborting.
+ * @param[in] args       Variable argument list of the message.
+ */
+Error create_error_va_list(ErrorCode error_code, const char *function, const char *file, const int line, const char *msg, va_list args);
 /** Creates an error containing the error message
  *
  * @param[in] error_code Error code
@@ -241,7 +252,7 @@ Error create_error(ErrorCode error_code, const char *function, const char *file,
  * @param[in] error Error value to check.
  */
 #define ARM_COMPUTE_ERROR_THROW_ON(error) \
-    error.throw_if_error();
+    error.throw_if_error()
 
 /** If the condition is true, the given message is printed and an exception is thrown
  *
diff --git a/arm_compute/core/GLES_COMPUTE/GCHelpers.h b/arm_compute/core/GLES_COMPUTE/GCHelpers.h
new file mode 100644
index 0000000000..475554f2be
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/GCHelpers.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCHELPERS_H__
+#define __ARM_COMPUTE_GCHELPERS_H__
+
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/Helpers.h"
+#include "support/ToolchainSupport.h"
+
+#include <string>
+
+namespace arm_compute
+{
+/** Helper function to create and return a unique_ptr pointed to a GLES kernel object
+ *  It also calls the kernel's configuration.
+ *
+ * @param[in] args All the arguments that need pass to kernel's configuration.
+ *
+ * @return A unique pointer pointed to a GLES kernel object
+ */
+template <typename Kernel, typename... T>
+std::unique_ptr<Kernel> create_configure_kernel(T &&... args)
+{
+    std::unique_ptr<Kernel> k = arm_compute::support::cpp14::make_unique<Kernel>();
+    k->configure(std::forward<T>(args)...);
+    return k;
+}
+
+/** Helper function to create and return a unique_ptr pointed to a GLES kernel object
+ *
+ * @return A unique pointer pointed to a GLES kernel object
+ */
+template <typename Kernel>
+std::unique_ptr<Kernel> create_kernel()
+{
+    std::unique_ptr<Kernel> k = arm_compute::support::cpp14::make_unique<Kernel>();
+    return k;
+}
+
+/** Max vector width of an GLES vector */
+static constexpr unsigned int max_gc_vector_width = 16;
+}
+#endif /* __ARM_COMPUTE_GCHELPERS_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h b/arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h
new file mode 100644
index 0000000000..e601b529ed
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCKERNELLIBRARY_H__
+#define __ARM_COMPUTE_GCKERNELLIBRARY_H__
+
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/Utils.h"
+
+#include <map>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace arm_compute
+{
+/** GCProgram class */
+class GCProgram
+{
+public:
+    /** Default constructor. */
+    GCProgram();
+    /** Construct program from source file.
+     *
+     * @param[in] name   Program name.
+     * @param[in] source Program source.
+     */
+    GCProgram(std::string name, std::string source);
+    /** Default Copy Constructor. */
+    GCProgram(const GCProgram &) = default;
+    /** Default Move Constructor. */
+    GCProgram(GCProgram &&) = default;
+    /** Default copy assignment operator. */
+    GCProgram &operator=(const GCProgram &) = default;
+    /** Default move assignment operator. */
+    GCProgram &operator=(GCProgram &&) = default;
+    /** Returns program name.
+     *
+     * @return Program's name.
+     */
+    std::string name() const
+    {
+        return _name;
+    }
+    /** Link program.
+     *
+     * @param[in] shader Shader used to link program.
+     *
+     * @return linked program id .
+     */
+    GLuint link_program(GLuint shader);
+    /** Compile shader.
+     *
+     * @param[in] build_options Shader build options.
+     *
+     * @return GLES shader object.
+     */
+    GLuint compile_shader(const std::string &build_options);
+
+private:
+    std::string _name;   /**< Program name. */
+    std::string _source; /**< Source code for the program. */
+};
+
+/** GCKernel class */
+class GCKernel
+{
+public:
+    /** Default Constructor. */
+    GCKernel();
+    /** Default Copy Constructor. */
+    GCKernel(const GCKernel &) = default;
+    /** Default Move Constructor. */
+    GCKernel(GCKernel &&) = default;
+    /** Default copy assignment operator. */
+    GCKernel &operator=(const GCKernel &) = default;
+    /** Default move assignment operator. */
+    GCKernel &operator=(GCKernel &&) = default;
+    /** Constructor.
+     *
+     * @param[in] name    Kernel name.
+     * @param[in] program Built program.
+     */
+    GCKernel(std::string name, GLuint program);
+    /** Returns kernel name.
+     *
+     * @return Kernel's name.
+     */
+    std::string name() const
+    {
+        return _name;
+    }
+    /** Get program id.
+     *
+     * @return program id.
+     */
+    GLuint get_program() const
+    {
+        return _program;
+    }
+    /** Use current program.
+     *
+     * @return program id.
+     */
+    void use();
+    /** Unuse current program.
+     *
+     * @return program id.
+     */
+    void unuse();
+    /** Set value at uniform idx.
+     *
+     * @param[in] idx   Index in vector.
+     * @param[in] value Set value.
+     */
+    template <class T>
+    void set_params(unsigned int idx, T value)
+    {
+        if(idx >= _params.size())
+        {
+            _params.resize(idx + 1, 0);
+        }
+
+        unsigned int *p = reinterpret_cast<unsigned int *>(&value);
+        _params[idx]    = *p;
+    }
+    /** Clear params.
+     *
+     */
+    void clear_params()
+    {
+        _params.clear();
+    }
+    /** Set shader params binding point.
+     *
+     * @param[in] binding Shader params binding point.
+     */
+    void set_shader_params_binding_point(unsigned int binding)
+    {
+        _shader_params_binding_point = binding;
+    }
+    /** Update shader params.
+     *
+     */
+    void update_shader_params();
+    /** Clean up program and ubo.
+     *
+     */
+    void cleanup();
+
+private:
+    std::string                  _name;                                 /**< Kernel name */
+    GLuint                       _program;                              /**< Linked program id */
+    std::vector<unsigned int>    _params;                               /**< Store all the values of the shader parameters */
+    GLuint                       _shader_params;                        /**< Uniform buffer object name for shader parameters */
+    GLuint                       _shader_params_binding_point;          /**< The binding point of the uniform block for shader parameters */
+    GLuint                       _shader_params_index;                  /**< The index of the uniform block */
+    GLint                        _shader_params_size;                   /**< The uniform block data size in the shader */
+    static constexpr const char *_shader_params_name = "shader_params"; /**< The uniform block name in the shader */
+};
+
+/** GCKernelLibrary class */
+class GCKernelLibrary
+{
+    using StringSet = std::set<std::string>;
+
+private:
+    /** Default Constructor. */
+    GCKernelLibrary();
+
+public:
+    /** Prevent instances of this class from being copied. */
+    GCKernelLibrary(const GCKernelLibrary &) = delete;
+    /** Prevent instances of this class from being copied. */
+    const GCKernelLibrary &operator=(const GCKernelLibrary &) = delete;
+    /** Default Destructor. */
+    ~GCKernelLibrary();
+
+    static GCKernelLibrary &get();
+    /** Initialises the kernel library.
+     *
+     * @param[in] shader_path (Optional) Path of the directory from which shader sources are loaded.
+     * @param[in] dpy         (Optional) EGLdisplay set by external application.
+     * @param[in] ctx         (Optional) EGLContext set by external application.
+     */
+    void init(std::string shader_path = "./", EGLDisplay dpy = EGL_NO_DISPLAY, EGLContext ctx = EGL_NO_CONTEXT)
+    {
+        //TODO: deal with old display and context.
+        _shader_path = std::move(shader_path);
+
+        _display = dpy;
+        _context = ctx;
+
+        if(_display == EGL_NO_DISPLAY || _context == EGL_NO_CONTEXT)
+        {
+            setup_context();
+
+            _own_context = true;
+        }
+
+        eglMakeCurrent(_display, EGL_NO_SURFACE, EGL_NO_SURFACE, _context);
+        setup_dummy_fbo();
+    }
+
+    /** Sets the path that the shaders reside in.
+     *
+     * @param[in] shader_path Path of the shader.
+     */
+    void set_shader_path(const std::string &shader_path)
+    {
+        _shader_path = shader_path;
+    };
+    /** Sets display and context to create kernel.
+     *
+     * @param[in] dpy EGLdisplay set by external application.
+     * @param[in] ctx EGLContext set by external application.
+     */
+    void set_context(EGLDisplay dpy, EGLContext ctx)
+    {
+        //TODO: deal with old display and context.
+        _display = dpy;
+        _context = ctx;
+
+        eglMakeCurrent(dpy, EGL_NO_SURFACE, EGL_NO_SURFACE, ctx);
+        setup_dummy_fbo();
+    };
+    /** Creates a kernel from the kernel library.
+     *
+     * @param[in] shader_name       Shader name.
+     * @param[in] build_options_set Shader build options as a set.
+     *
+     * @return The created kernel.
+     */
+    GCKernel create_kernel(const std::string &shader_name, const StringSet &build_options_set = {}) const;
+    /** Serializes and saves programs to a binary.
+     *
+     */
+    void save_binary();
+    /** Load serialized binary with all the programs.
+     *
+     */
+    void load_binary();
+    /** Setup a dummy fbo to workaround an issue on Galaxy S8.
+     *
+     */
+    void setup_dummy_fbo();
+
+private:
+    /** Preprocess GLES shader
+     *
+     * @param[in] shader_source Source code of the shader to preprocess.
+     *
+     * @return Preprocessed GLES shader object.
+     */
+    const std::string preprocess_shader(const std::string &shader_source) const;
+    /** Load program and its dependencies.
+     *
+     * @param[in] program_name Name of the program to load.
+     */
+    const GCProgram &load_program(const std::string &program_name) const;
+    /** Concatenates contents of a set into a single string.
+     *
+     * @param[in] s Input set to concatenate.
+     *
+     * @return Concatenated string.
+     */
+    std::string stringify_set(const StringSet &s) const;
+    /** Set up EGL context.
+     */
+    void setup_context();
+
+    EGLDisplay  _display;                                                /**< Underlying EGL Display. */
+    EGLContext  _context;                                                /**< Underlying EGL Context. */
+    GLuint      _frame_buffer;                                           /**< Dummy fbo */
+    GLuint      _tex_rt;                                                 /**< Dummy texture for render target */
+    bool        _own_context;                                            /**< Self created context or not. */
+    std::string _shader_path;                                            /**< Path to the shaders folder. */
+    mutable std::map<std::string, const GCProgram>  _programs_map;       /**< Map with all already loaded program data. */
+    mutable std::map<std::string, const GCKernel>   _built_programs_map; /**< Map with all already built program data. */
+    static const std::map<std::string, std::string> _shader_program_map; /**< Map that associates kernel names with programs. */
+    static const std::map<std::string, std::string> _program_source_map; /**< Contains sources for all programs.
+                                                                              Used for compile-time shader inclusion. */
+};
+}
+#endif /* __ARM_COMPUTE_GCKERNELLIBRARY_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/GCKernels.h b/arm_compute/core/GLES_COMPUTE/GCKernels.h
new file mode 100644
index 0000000000..57d11d5f18
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/GCKernels.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCKERNELS_H__
+#define __ARM_COMPUTE_GCKERNELS_H__
+
+/* Header regrouping all the GLES compute kernels */
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h"
+
+#endif /* __ARM_COMPUTE_GCKERNELS_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/IGCKernel.h b/arm_compute/core/GLES_COMPUTE/IGCKernel.h
new file mode 100644
index 0000000000..0d3bfb30fd
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/IGCKernel.h
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_IGCKERNEL_H__
+#define __ARM_COMPUTE_IGCKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+
+#include "arm_compute/core/IKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+class Window;
+
+/** Common interface for all the GLES kernels */
+class IGCKernel : public IKernel
+{
+public:
+    /** Constructor */
+    IGCKernel();
+    /** Returns a reference to the GLES kernel of this object.
+     *
+     * @return A reference to the GLES kernel of this object.
+     */
+    GCKernel &kernel();
+
+    class BufferParam
+    {
+    public:
+        /** Tensor's binding point in this kernel. */
+        unsigned int binding_point = 0;
+        /** The base 2 logarithm of SSBO buffer data type size (Number of bits to be shift for offset calculation) */
+        unsigned int buffer_data_type_shift = 0;
+
+        /** Constructor
+         *
+         * @param[in] binding Tensor's binding point.
+         * @param[in] shift   Number of bits to be shift for offset calculation
+        */
+        BufferParam(const unsigned int binding, const unsigned int shift)
+            : binding_point(binding), buffer_data_type_shift(shift)
+        {
+        }
+    };
+
+    /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx.
+     *
+     * @param[in] idx           Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
+     * @param[in] tensor        Tensor to set as an argument of the object's kernel.
+     * @param[in] binding_point Tensor's binding point in this kernel.
+     * @param[in] window        Window the kernel will be executed on.
+     */
+    void add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window);
+
+    /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx.
+     *
+     * @param[in] idx    Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
+     * @param[in] tensor Tensor to set as an argument of the object's kernel.
+     * @param[in] param  Additional parameter for GLES SSBO buffer.
+     * @param[in] window Window the kernel will be executed on.
+     */
+    void add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window);
+
+    /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx.
+     *
+     * @param[in] idx           Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
+     * @param[in] tensor        Tensor to set as an argument of the object's kernel.
+     * @param[in] binding_point Tensor's binding point in this kernel.
+     * @param[in] window        Window the kernel will be executed on.
+     */
+    void add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window);
+
+    /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx.
+     *
+     * @param[in] idx    Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
+     * @param[in] tensor Tensor to set as an argument of the object's kernel.
+     * @param[in] param  Additional parameter for GLES SSBO buffer.
+     * @param[in] window Window the kernel will be executed on.
+     */
+    void add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window);
+
+    /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx.
+     *
+     * @param[in] idx           Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
+     * @param[in] tensor        Tensor to set as an argument of the object's kernel.
+     * @param[in] binding_point Tensor's binding point in this kernel.
+     * @param[in] window        Window the kernel will be executed on.
+     */
+    void add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window);
+
+    /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx.
+     *
+     * @param[in] idx    Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
+     * @param[in] tensor Tensor to set as an argument of the object's kernel.
+     * @param[in] param  Additional parameter for GLES SSBO buffer.
+     * @param[in] window Window the kernel will be executed on.
+     */
+    void add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window);
+
+    /** Returns the number of arguments enqueued per 1D tensor object.
+     *
+     * @return The number of arguments enqueues per 1D tensor object.
+     */
+    unsigned int num_arguments_per_1D_tensor() const;
+    /** Returns the number of arguments enqueued per 2D tensor object.
+     *
+     * @return The number of arguments enqueues per 2D tensor object.
+     */
+    unsigned int num_arguments_per_2D_tensor() const;
+    /** Returns the number of arguments enqueued per 3D tensor object.
+     *
+     * @return The number of arguments enqueues per 3D tensor object.
+     */
+    unsigned int num_arguments_per_3D_tensor() const;
+    /** Enqueue the OpenGL ES shader to process the given window
+     *
+     * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+     */
+    virtual void run(const Window &window) = 0;
+
+private:
+    /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx.
+     *
+     * @param[in] idx    Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
+     * @param[in] tensor Tensor to set as an argument of the object's kernel.
+     * @param[in] param  Additional parameter for GLES SSBO buffer.
+     * @param[in] window Window the kernel will be executed on.
+     */
+    template <unsigned int dimension_size>
+    void add_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window);
+
+    /** Returns the number of arguments enqueued per tensor object.
+     *
+     * @return The number of arguments enqueued per tensor object.
+     */
+    template <unsigned int dimension_size>
+    unsigned int           num_arguments_per_tensor() const;
+
+protected:
+    GCKernel _kernel; /**< GLES kernel to run */
+};
+
+/** Add the kernel to the command queue with the given window.
+ *
+ * @note Depending on the size of the window, this might translate into several jobs being enqueued.
+ *
+ * @note If kernel->kernel() is empty then the function will return without adding anything to the queue.
+ *
+ * @param[in] kernel Kernel to enqueue
+ * @param[in] window Window the kernel has to process.
+ * @param[in] lws    Local workgroup size requested, by default (1, 1, 1)
+ *
+ * @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed.
+ */
+void enqueue(IGCKernel &kernel, const Window &window, const gles::NDRange &lws = gles::NDRange(1U, 1U, 1U));
+}
+#endif /*__ARM_COMPUTE_IGCKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h b/arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h
new file mode 100644
index 0000000000..413e86a2b7
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_IGCSIMPLE2DKERNEL_H__
+#define __ARM_COMPUTE_IGCSIMPLE2DKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for simple OpenGL ES kernels having 1 tensor input and 1 tensor output. This interface can be used when the work-item processes a 2D tile */
+class IGCSimple2DKernel : public IGCSimpleKernel
+{
+public:
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+};
+}
+#endif /*__ARM_COMPUTE_IGCSIMPLE2DKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h b/arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h
new file mode 100644
index 0000000000..622e53c38b
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_IGCSIMPLE3DKERNEL_H__
+#define __ARM_COMPUTE_IGCSIMPLE3DKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for simple GLES kernels having 1 tensor input and 1 tensor output.
+ *  Both input tensor and output tensor must have at least 3 dimensions.
+ */
+class IGCSimple3DKernel : public IGCSimple2DKernel
+{
+public:
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+};
+}
+#endif /*__ARM_COMPUTE_IGCSIMPLE3DKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h b/arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h
new file mode 100644
index 0000000000..a23c4e774e
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_IGCSIMPLEKERNEL_H__
+#define __ARM_COMPUTE_IGCSIMPLEKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/Helpers.h"
+
+namespace arm_compute
+{
+/** Interface for simple OpenGL ES kernels having 1 tensor input and 1 tensor output */
+class IGCSimpleKernel : public IGCKernel
+{
+public:
+    /** Constructor. */
+    IGCSimpleKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    IGCSimpleKernel(const IGCSimpleKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    IGCSimpleKernel &operator=(const IGCSimpleKernel &) = delete;
+    /** Allow instances of this class to be moved. */
+    IGCSimpleKernel(IGCSimpleKernel &&) = default;
+    /** Allow instances of this class to be moved. */
+    IGCSimpleKernel &operator=(IGCSimpleKernel &&) = default;
+    /** Default destructor */
+    ~IGCSimpleKernel() = default;
+
+    /** Configure the kernel
+     *
+     * @param[in]  input                             Source tensor.
+     * @param[out] output                            Destination tensor.
+     * @param[in]  num_elems_processed_per_iteration Number of processed elements per iteration.
+     * @param[in]  border_undefined                  (Optional) True if the border mode is undefined. False if it's replicate or constant.
+     * @param[in]  border_size                       (Optional) Size of the border.
+     */
+    void configure(const IGCTensor *input, IGCTensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize());
+
+protected:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+};
+}
+
+#endif /*__ARM_COMPUTE_IGCSIMPLEKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/IGCTensor.h b/arm_compute/core/GLES_COMPUTE/IGCTensor.h
new file mode 100644
index 0000000000..ab4e57e0ce
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/IGCTensor.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_IGCTENSOR_H__
+#define __ARM_COMPUTE_IGCTENSOR_H__
+
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/ITensor.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+/** Interface for GLES Compute tensor */
+class IGCTensor : public ITensor
+{
+public:
+    /** Default constructor. */
+    IGCTensor();
+
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    IGCTensor(const IGCTensor &) = delete;
+
+    /** Prevent instances of this class from being copy assigned (As this class contains pointers). */
+    IGCTensor &operator=(const IGCTensor &) = delete;
+
+    /** Allow instances of this class to be moved */
+    IGCTensor(IGCTensor &&) = default;
+
+    /** Allow instances of this class to be moved */
+    IGCTensor &operator=(IGCTensor &&) = default;
+
+    /** Virtual destructor */
+    virtual ~IGCTensor() = default;
+
+    /** Map on an allocated buffer.
+     *
+     * @param[in] blocking (Optional) If true, then the mapping will be ready to use by the time
+     *                     this method returns, else it is the caller's responsibility
+     *                     to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
+     */
+    void map(bool blocking = true);
+    /** Unmap an allocated and mapped buffer.
+     */
+    void unmap();
+    /** Clear the contents of the tensor synchronously.
+     */
+    void clear();
+
+    // Inherited methods overridden:
+    uint8_t *buffer() const override;
+    /** Interface to be implemented by the child class to return the tensor's gles compute buffer id.
+      *
+      * @return A SSBO buffer id.
+     */
+    virtual GLuint gc_buffer() const = 0;
+
+protected:
+    /** Method to be implemented by the child class to map the SSBO.
+     *
+     * @param[in] blocking If true, then the mapping will be ready to use by the time
+     *                     this method returns, else it is the caller's responsibility
+     *                     to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
+     */
+    virtual uint8_t *do_map(bool blocking) = 0;
+    /** Method to be implemented by the child class to unmap the SSBO.
+     *
+     * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+     *       the memory is accessed by the device.
+     */
+    virtual void do_unmap() = 0;
+
+private:
+    uint8_t *_mapping;
+};
+
+using IGCImage = IGCTensor;
+}
+#endif /*__ARM_COMPUTE_IGCTENSOR_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/OpenGLES.h b/arm_compute/core/GLES_COMPUTE/OpenGLES.h
new file mode 100644
index 0000000000..e123982945
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/OpenGLES.h
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_OPENGLES_H__
+#define __ARM_COMPUTE_OPENGLES_H__
+
+#include "arm_compute/core/Log.h"
+
+#include <EGL/egl.h>
+#include <EGL/eglext.h>
+#include <EGL/eglplatform.h>
+#include <GLES3/gl31.h>
+#include <GLES3/gl3ext.h>
+#include <cstddef>
+
+#ifdef ARM_COMPUTE_DEBUG_ENABLED
+#define ARM_COMPUTE_GL_CHECK(x)                                                                      \
+    x;                                                                                               \
+    {                                                                                                \
+        GLenum error = glGetError();                                                                 \
+        if(error != GL_NO_ERROR)                                                                     \
+            ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("glGetError() = %i (0x%.8x)\n", error, error); \
+    }
+#else /* ARM_COMPUTE_DEBUG_ENABLED */
+#define ARM_COMPUTE_GL_CHECK(x) x
+#endif /* ARM_COMPUTE_DEBUG_ENABLED */
+
+namespace arm_compute
+{
+namespace gles
+{
+/** Class interface for specifying NDRange values. */
+class NDRange
+{
+private:
+    size_t _sizes[3];
+    size_t _dimensions;
+
+public:
+    /** Default constructor - resulting range has zero dimensions. */
+    NDRange()
+        : _dimensions(0)
+    {
+        _sizes[0] = 0;
+        _sizes[1] = 0;
+        _sizes[2] = 0;
+    }
+
+    /** Constructs one-dimensional range.
+     *
+     * @param[in] size0 Size of the first dimension.
+     */
+    NDRange(size_t size0)
+        : _dimensions(1)
+    {
+        _sizes[0] = size0;
+        _sizes[1] = 1;
+        _sizes[2] = 1;
+    }
+
+    /** Constructs two-dimensional range.
+     *
+     * @param[in] size0 Size of the first dimension.
+     * @param[in] size1 Size of the second dimension.
+     */
+    NDRange(size_t size0, size_t size1)
+        : _dimensions(2)
+    {
+        _sizes[0] = size0;
+        _sizes[1] = size1;
+        _sizes[2] = 1;
+    }
+
+    /** Constructs three-dimensional range.
+     *
+     * @param[in] size0 Size of the first dimension.
+     * @param[in] size1 Size of the second dimension.
+     * @param[in] size2 Size of the third dimension.
+     */
+    NDRange(size_t size0, size_t size1, size_t size2)
+        : _dimensions(3)
+    {
+        _sizes[0] = size0;
+        _sizes[1] = size1;
+        _sizes[2] = size2;
+    }
+
+    /** Conversion operator to const size_t *.
+     *
+     *  @returns A pointer to the size of the first dimension.
+     */
+    operator const size_t *() const
+    {
+        return _sizes;
+    }
+
+    /** Queries the number of dimensions in the range.
+     *
+     * @returns The number of dimensions.
+    */
+    size_t dimensions() const
+    {
+        return _dimensions;
+    }
+
+    /** Returns the size of the object in bytes based on the runtime number of dimensions
+     *
+     * @returns The size of the object in bytes.
+     */
+    size_t size() const
+    {
+        return _dimensions * sizeof(size_t);
+    }
+
+    /** Returns the sizes array for each dimensions.
+     *
+     * @returns The sizes array
+     */
+    size_t *get()
+    {
+        return _sizes;
+    }
+
+    /** Returns the sizes array for each dimensions.
+     *
+     * @returns The sizes array
+     */
+    const size_t *get() const
+    {
+        return _sizes;
+    }
+};
+
+static const NDRange NullRange;
+static const NDRange Range_128_1 = NDRange(128, 1);
+} // namespace gles
+
+/** Check if the OpenGL ES 3.1 API is available at runtime.
+ *
+ *  @returns true if the OpenGL ES 3.1 API is available.
+ */
+bool opengles31_is_available();
+} // namespace arm_compute
+
+#endif /* __ARM_COMPUTE_OPENGLES_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h
new file mode 100644
index 0000000000..71f7b37700
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCABSOLUTEDIFFERENCEKERNEL_H__
+#define __ARM_COMPUTE_GCABSOLUTEDIFFERENCEKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the absolute difference kernel.
+ *
+ * Absolute difference is computed by:
+ * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f]
+ */
+class GCAbsoluteDifferenceKernel : public IGCKernel
+{
+public:
+    /** Default constructor. */
+    GCAbsoluteDifferenceKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    GCAbsoluteDifferenceKernel(const GCAbsoluteDifferenceKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    GCAbsoluteDifferenceKernel &operator=(const GCAbsoluteDifferenceKernel &) = delete;
+    /** Allow instances of this class to be moved. */
+    GCAbsoluteDifferenceKernel(GCAbsoluteDifferenceKernel &&) = default;
+    /** Allow instances of this class to be moved. */
+    GCAbsoluteDifferenceKernel &operator=(GCAbsoluteDifferenceKernel &&) = default;
+    /** Default destructor */
+    ~GCAbsoluteDifferenceKernel() = default;
+
+    /** Set the inputs and output images.
+     *
+     * @param[in]  input1 Source tensor. Data types supported: U8
+     * @param[in]  input2 Source tensor. Data types supported: U8
+     * @param[out] output Destination tensor. Data types supported: U8
+     */
+    void configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input1; /**< Source tensor 1. */
+    const IGCTensor *_input2; /**< Source tensor 2. */
+    IGCTensor       *_output; /**< Destination tensor. */
+};
+}
+#endif /* __ARM_COMPUTE_GCABSOLUTEDIFFERENCEKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h
new file mode 100644
index 0000000000..fc1d52f455
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCACTIVATIONLAYERKERNEL_H__
+#define __ARM_COMPUTE_GCACTIVATIONLAYERKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the activation layer kernel. */
+class GCActivationLayerKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCActivationLayerKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCActivationLayerKernel(const GCActivationLayerKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCActivationLayerKernel &operator=(const GCActivationLayerKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCActivationLayerKernel(GCActivationLayerKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCActivationLayerKernel &operator=(GCActivationLayerKernel &&) = default;
+    /** Default destructor */
+    ~GCActivationLayerKernel() = default;
+    /** Set the input and output tensor.
+     *
+     * @note If the output tensor is a nullptr, the activation function will be performed in-place
+     *
+     * @param[in, out] input    Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
+     *                          of the activation function. Data types supported: F16/F32.
+     * @param[out]     output   Destination tensor. Data type should match the input data type.
+     * @param[in]      act_info Activation layer information.
+     */
+    void configure(IGCTensor *input, IGCTensor *output, ActivationLayerInfo act_info);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    IGCTensor *_input;
+    IGCTensor *_output;
+};
+}
+#endif /*__ARM_COMPUTE_GCACTIVATIONLAYERKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h
new file mode 100644
index 0000000000..2bbd6a83fe
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCBATCHNORMALIZATIONLAYERKERNEL_H__
+#define __ARM_COMPUTE_GCBATCHNORMALIZATIONLAYERKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the BatchNormalization layer kernel.
+ */
+class GCBatchNormalizationLayerKernel : public IGCKernel
+{
+public:
+    /** Constructor */
+    GCBatchNormalizationLayerKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCBatchNormalizationLayerKernel(const GCBatchNormalizationLayerKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCBatchNormalizationLayerKernel &operator=(const GCBatchNormalizationLayerKernel &) = delete;
+    /** Default Move Constructor. */
+    GCBatchNormalizationLayerKernel(GCBatchNormalizationLayerKernel &&) = default;
+    /** Default move assignment operator. */
+    GCBatchNormalizationLayerKernel &operator=(GCBatchNormalizationLayerKernel &&) = default;
+    /** Default destructor */
+    ~GCBatchNormalizationLayerKernel() = default;
+
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input   Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM].
+     *                     The rest are optional and used for representing batches. Data types supported: F16/F32.
+     * @param[out] output  Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
+     * @param[in]  mean    Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  var     Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  beta    Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  epsilon Small value to avoid division with zero.
+     */
+    void configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta, const IGCTensor *gamma, float epsilon);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+    const IGCTensor *_mean;
+    const IGCTensor *_var;
+    const IGCTensor *_beta;
+    const IGCTensor *_gamma;
+    float            _epsilon;
+};
+}
+#endif /*__ARM_COMPUTE_GCBATCHNORMALIZATIONLAYERKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h
new file mode 100644
index 0000000000..257ab0eca0
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCCOL2IMKERNEL_H__
+#define __ARM_COMPUTE_GCCOL2IMKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the col2im reshaping kernel.
+ *
+ * Rearranges each matrix column into image blocks. It's the inverse operation of @ref GCIm2ColKernel.
+ *
+ * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3:
+ *
+ * @f[
+ * \left( \begin{array}{ccccccccc}
+ * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccc}
+ * a0 & a1 & a2 \\
+ * a3 & a4 & a5 \\
+ * a6 & a7 & a8 \\
+ * \end{array} \right)
+ * @f]
+ */
+class GCCol2ImKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCCol2ImKernel();
+
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCCol2ImKernel(const GCCol2ImKernel &) = delete;
+
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCCol2ImKernel &operator=(const GCCol2ImKernel &) = delete;
+
+    /** Allow instances of this class to be moved */
+    GCCol2ImKernel(GCCol2ImKernel &&) = default;
+
+    /** Allow instances of this class to be moved */
+    GCCol2ImKernel &operator=(GCCol2ImKernel &&) = default;
+
+    /** Default destructor */
+    ~GCCol2ImKernel() = default;
+
+    /** Set the input and output of the kernel.
+     *
+     * @param[in]  input          The input tensor to convert. Data types supported: F32
+     * @param[out] output         The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
+     *                            while the rest represent batch of outputs. Data types supported: Same as @p input
+     * @param[in]  convolved_dims Output convolved dimensions.
+     */
+    void configure(const IGCTensor *input, IGCTensor *output, std::pair<unsigned int, unsigned int> convolved_dims);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+    std::pair<unsigned int, unsigned int> _convolved_dims;
+};
+}
+
+#endif /*__ARM_COMPUTE_GCCOL2IMKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h
new file mode 100644
index 0000000000..9a34a9a9c5
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__
+#define __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the depth concatenate kernel.
+ *  The input tensor will be concatenated into the output tensor.
+ */
+class GCDepthConcatenateKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCDepthConcatenateKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCDepthConcatenateKernel(const GCDepthConcatenateKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCDepthConcatenateKernel &operator=(const GCDepthConcatenateKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCDepthConcatenateKernel(GCDepthConcatenateKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCDepthConcatenateKernel &operator=(GCDepthConcatenateKernel &&) = default;
+    /** Default destructor */
+    ~GCDepthConcatenateKernel() = default;
+    /** Initialise the kernel's inputs and output
+     *
+     * @param[in]     input        Input tensor. Data types supported: F16/F32.
+     * @param[in]     depth_offset The offset on the Z axis.
+     * @param[in,out] output       Output tensor. Data types supported: Same as @p input.
+     *
+     * @note: The output tensor's low two dimensions can't be smaller than the input one's.
+     * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
+     *
+     */
+    void configure(const IGCTensor *input, unsigned int depth_offset, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+    BorderSize border_size() const override;
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+    int              _top_bottom;
+    int              _left_right;
+};
+}
+#endif /* __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h
new file mode 100644
index 0000000000..415b781bc6
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYERKERNEL_H__
+#define __ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYERKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the direct convolution kernel.
+ */
+template <unsigned int kernel_size>
+class GCDirectConvolutionLayerKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCDirectConvolutionLayerKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCDirectConvolutionLayerKernel(const GCDirectConvolutionLayerKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCDirectConvolutionLayerKernel &operator=(const GCDirectConvolutionLayerKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCDirectConvolutionLayerKernel(GCDirectConvolutionLayerKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCDirectConvolutionLayerKernel &operator=(GCDirectConvolutionLayerKernel &&) = default;
+    /** Default destructor */
+    ~GCDirectConvolutionLayerKernel() = default;
+    /** Set the input and output of the kernel.
+     *
+     * @param[in]  input     The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
+     *                       while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32
+     * @param[in]  weights   Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+     * @param[in]  bias      Biases tensor. Shared bias supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input.
+     * @param[out] output    The output tensor. First 2 lower dimensions represent a transform of each 3D input,
+     *                       while every dimension above represents a batch. Data types supported: Same as @p input
+     * @param[in]  conv_info Contains padding and stride information described in @ref PadStrideInfo.
+     */
+    void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *bias, IGCTensor *output, const PadStrideInfo &conv_info);
+
+    // Inherited methods overridden:
+    BorderSize border_size() const override;
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    const IGCTensor *_bias;
+    const IGCTensor *_weights;
+    IGCTensor       *_output;
+    BorderSize       _border_size;
+    int              _conv_stride_x;
+    int              _conv_stride_y;
+    int              _conv_pad_x;
+    int              _conv_pad_y;
+    gles::NDRange    _lws;
+};
+
+using GCDirectConvolutionLayer1x1Kernel = GCDirectConvolutionLayerKernel<1>;
+using GCDirectConvolutionLayer3x3Kernel = GCDirectConvolutionLayerKernel<3>;
+using GCDirectConvolutionLayer5x5Kernel = GCDirectConvolutionLayerKernel<5>;
+}
+#endif /*__ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYERKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h
new file mode 100644
index 0000000000..6159a7af26
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCDROPOUTKERNEL_H__
+#define __ARM_COMPUTE_GCDROPOUTKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the dropout kernel.
+ *
+ * Dropout is used to improve over-fit on neural networks.
+ *
+ */
+class GCDropoutKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCDropoutKernel();
+
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCDropoutKernel(const GCDropoutKernel &) = delete;
+
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCDropoutKernel &operator=(const GCDropoutKernel &) = delete;
+
+    /** Allow instances of this class to be moved */
+    GCDropoutKernel(GCDropoutKernel &&) = default;
+
+    /** Allow instances of this class to be moved */
+    GCDropoutKernel &operator=(GCDropoutKernel &&) = default;
+
+    /** Set the input and output of the kernel.
+     *
+     * @param[in]  input   The input tensor for this op. Data types supported: F16/F32
+     * @param[out] mask    The mask tensor. Data types supported: Same as @p input
+     * @param[out] output  The output tensor. Data types supported: Same as @p input
+     * @param[in]  ratio   Dropout ratio
+     * @param[in]  forward Forward or backward propagation
+     *
+     */
+    void configure(const IGCTensor *input, IGCTensor *mask, IGCTensor *output, float ratio, bool forward);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_mask;
+    IGCTensor       *_output;
+    unsigned int     _num_elems_processed_per_iteration;
+};
+}
+
+#endif /*__ARM_COMPUTE_GCDROPOUTKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h
new file mode 100644
index 0000000000..acb8aa67d3
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCFILLBORDERKERNEL_H__
+#define __ARM_COMPUTE_GCFILLBORDERKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for filling the border of a kernel */
+class GCFillBorderKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCFillBorderKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCFillBorderKernel(const GCFillBorderKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCFillBorderKernel &operator=(const GCFillBorderKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCFillBorderKernel(GCFillBorderKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCFillBorderKernel &operator=(GCFillBorderKernel &&) = default;
+    /** Default destructor */
+    ~GCFillBorderKernel() = default;
+
+    /** Initialise the kernel's input, output and border mode.
+     *
+     * @param[in,out] tensor                Tensor to process Data types supported: F16/F32.
+     * @param[in]     border_size           Size of the border to fill in elements.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const IGCTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
+
+    /** Function to set the constant value on fill border kernel depending on type.
+     *
+     * @param[in] idx                   Index of the kernel argument to set.
+     * @param[in] constant_border_value Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    template <class T>
+    void set_constant_border(unsigned int idx, const PixelValue &constant_border_value);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+    bool is_parallelisable() const override;
+
+private:
+    const IGCTensor *_tensor;
+};
+}
+#endif /*__ARM_COMPUTE_GCFILLBORDERKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h
new file mode 100644
index 0000000000..b2369a6ad1
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCGEMMINTERLEAVE4X4KERNEL_H__
+#define __ARM_COMPUTE_GCGEMMINTERLEAVE4X4KERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** OpenGL ES kernel which interleaves the elements of a matrix A in chunk of 4x4
+ *
+ * This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values)
+ *
+ * @f[
+ * \left( \begin{array}{cccc}
+ * a00 & a01 & a02 & a03 \\
+ * a10 & a11 & a12 & a13 \\
+ * a20 & a21 & a22 & a23 \\
+ * a30 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccccccccccccccccc}
+ * a00 & a10 & a20 & a30 & a01 & a11 & a21 & a31 & a02 & a12 & a22 & a32 & a03 & a13 & a23 & a33 \\
+ * \end{array} \right)
+ * @f]
+ *
+ * After this operation, the output matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ]
+ */
+class GCGEMMInterleave4x4Kernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCGEMMInterleave4x4Kernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMInterleave4x4Kernel(const GCGEMMInterleave4x4Kernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMInterleave4x4Kernel &operator=(const GCGEMMInterleave4x4Kernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCGEMMInterleave4x4Kernel(GCGEMMInterleave4x4Kernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCGEMMInterleave4x4Kernel &operator=(GCGEMMInterleave4x4Kernel &&) = default;
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  input  Input tensor. Data types supported: F32
+     * @param[out] output Output tensor. Data type supported: same as @p input
+     */
+    void configure(const IGCTensor *input, IGCTensor *output);
+
+    // Inherited methods overridden
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+};
+}
+#endif /* __ARM_COMPUTE_GCGEMMINTERLEAVE4X4KERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h
new file mode 100644
index 0000000000..20f28cbb65
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCGEMMMATRIXACCUMULATEBIASESKERNEL_H__
+#define __ARM_COMPUTE_GCGEMMMATRIXACCUMULATEBIASESKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+/** Interface to add a bias to each row of the input tensor
+ *
+ */
+class GCGEMMMatrixAccumulateBiasesKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCGEMMMatrixAccumulateBiasesKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMMatrixAccumulateBiasesKernel(const GCGEMMMatrixAccumulateBiasesKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMMatrixAccumulateBiasesKernel &operator=(const GCGEMMMatrixAccumulateBiasesKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCGEMMMatrixAccumulateBiasesKernel(GCGEMMMatrixAccumulateBiasesKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCGEMMMatrixAccumulateBiasesKernel &operator=(GCGEMMMatrixAccumulateBiasesKernel &&) = default;
+    /** Set the accumulate buffer and the biases of the kernel.
+     *
+     * @param[in, out] accum  The accumulate tensor to convert. Data types supported: F16/F32
+     * @param[in]      biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input
+     */
+    void configure(IGCTensor *accum, const IGCTensor *biases);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    IGCTensor       *_accum;
+    const IGCTensor *_biases;
+};
+}
+
+#endif /*__ARM_COMPUTE_GCGEMMMATRIXACCUMULATEBIASESKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h
new file mode 100644
index 0000000000..02abb8da76
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCGEMMMATRIXADDITIONKERNEL_H__
+#define __ARM_COMPUTE_GCGEMMMATRIXADDITIONKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** OpenGL ES kernel to perform the in-place matrix addition between 2 matrices, taking into account that the second matrix might be weighted by a scalar value beta.
+ *  The matrices must have the same dimensions
+ *
+ * @note This kernel is computed if and only if beta != 0.0.
+ */
+class GCGEMMMatrixAdditionKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCGEMMMatrixAdditionKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMMatrixAdditionKernel(const GCGEMMMatrixAdditionKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMMatrixAdditionKernel &operator=(const GCGEMMMatrixAdditionKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCGEMMMatrixAdditionKernel(GCGEMMMatrixAdditionKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCGEMMMatrixAdditionKernel &operator=(GCGEMMMatrixAdditionKernel &&) = default;
+    /** Initialise the kernel's input, output and beta value
+     *
+     * @note The input and output tensors must have the same dimensions
+     *
+     * @param[in]      input  Input tensor (Matrix C). Data types supported: F32
+     * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result (alpha * AB + beta * C), output must contain the result obtained by @ref GCGEMMMatrixMultiplyKernel. Data type supported: same as @p input
+     * @param[in]      beta   Weight of matrix C
+     */
+    void configure(const IGCTensor *input, IGCTensor *output, float beta);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+};
+}
+
+#endif /* __ARM_COMPUTE_GCGEMMMATRIXADDITIONKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h
new file mode 100644
index 0000000000..3a0b22f148
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCGEMMMATRIXMULTIPLYKERNEL_H__
+#define __ARM_COMPUTE_GCGEMMMATRIXMULTIPLYKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** GLES Compute kernel to multiply two input matrices "A" and "B" or to multiply a vector "A" by a matrix "B". All elements of the output matrix/vector will be multiplied by alpha
+ *
+ * @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref GCGEMMInterleave4x4Kernel" and @ref GCGEMMTranspose1xWKernel
+ * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped
+ *
+ * @attention The second input tensor must have at least 2 dimensions (matrix)
+ *
+ */
+class GCGEMMMatrixMultiplyKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCGEMMMatrixMultiplyKernel();
+
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMMatrixMultiplyKernel(const GCGEMMMatrixMultiplyKernel &) = delete;
+
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMMatrixMultiplyKernel &operator=(const GCGEMMMatrixMultiplyKernel &) = delete;
+
+    /** Allow instances of this class to be moved */
+    GCGEMMMatrixMultiplyKernel(GCGEMMMatrixMultiplyKernel &&) = default;
+
+    /** Allow instances of this class to be moved */
+    GCGEMMMatrixMultiplyKernel &operator=(GCGEMMMatrixMultiplyKernel &&) = default;
+
+    /** Initialise the kernel's input, output and alpha
+     *
+     * @param[in]  input0                    Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32
+     * @param[in]  input1                    Input tensor containing the transposed Matrix B if the first input tensor A is not a vector.
+     *                                       If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0
+     * @param[out] output                    Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
+     * @param[in]  alpha                     Weight of the matrix product
+     * @param[in]  is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref GCGEMMInterleave4x4Kernel and @ref GCGEMMTranspose1xWKernel
+     */
+    void configure(const IGCTensor *input0, const IGCTensor *input1, IGCTensor *output, float alpha, bool is_interleaved_transposed = true);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input0;
+    const IGCTensor *_input1;
+    IGCTensor       *_output;
+};
+}
+#endif /* __ARM_COMPUTE_GCGEMMMATRIXMULTIPLYKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h
new file mode 100644
index 0000000000..4223556ac4
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCGEMMTRANSPOSE1XWKERNEL_H__
+#define __ARM_COMPUTE_GCGEMMTRANSPOSE1XWKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** OpenGLES kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor)
+ *
+ * Following an example of how the transposition1xW works when the input data type is F32
+ *
+ * @f[
+ * \left( \begin{array}{cccc}
+ * a00 & a01 & a02 & a03 \\
+ * a10 & a11 & a12 & a13 \\
+ * a20 & a21 & a22 & a23 \\
+ * a30 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccccccccccccccccc}
+ * a00 & a01 & a02 & a03 & a10 & a11 & a12 & a13 & a20 & a21 & a22 & a23 & a30 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * @f]
+ *
+ * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor)
+ *
+ */
+class GCGEMMTranspose1xWKernel : public IGCSimple2DKernel
+{
+public:
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  input  Input tensor. Data types supported: F32
+     * @param[out] output Output tensor. Data type supported: same as @p input
+     */
+    void configure(const IGCTensor *input, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+};
+}
+#endif /* __ARM_COMPUTE_GCGEMMTRANSPOSE1XWKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h
new file mode 100644
index 0000000000..e1b35607ff
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCIM2COLKERNEL_H__
+#define __ARM_COMPUTE_GCIM2COLKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the im2col reshape kernel.
+ *
+ * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column.
+ * It is used to transform a convolution to a plain matrix multiplication.
+ *
+ * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have:
+ * @f[
+ * \left( \begin{array}{cccc}
+ * a00 & a01 & a02 & a03 \\
+ * a10 & a11 & a12 & a13 \\
+ * a20 & a21 & a22 & a23 \\
+ * a30 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * =
+ * \left( \begin{array}{ccccccccc}
+ * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\
+ * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\
+ * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\
+ * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * @f]
+ */
+class GCIm2ColKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCIm2ColKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCIm2ColKernel(const GCIm2ColKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCIm2ColKernel &operator=(const GCIm2ColKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCIm2ColKernel(GCIm2ColKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCIm2ColKernel &operator=(GCIm2ColKernel &&) = default;
+    /** Set the input and output of the kernel.
+     *
+     * @param[in]  input       The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
+     *                         while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F32
+     * @param[out] output      The output tensor. First 2 lower dimensions represent a transform of each 3D input,
+     *                         while every dimension above represents a batch. Data types supported: Same as @p input
+     * @param[in]  kernel_dims The kernel dimensions (width and height).
+     * @param[in]  conv_info   Contains padding and stride information described in @ref PadStrideInfo.
+     * @param[in]  has_bias    In case biases are provided expands the matrix with 1.
+     */
+    void configure(const IGCTensor *input, IGCTensor *output, std::pair<unsigned int, unsigned int> kernel_dims, const PadStrideInfo &conv_info, bool has_bias);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    /** Run the reshape kernel optimised for the special case (stride is 1, padding is 0 and kernel's low 3 dimensions are same as input)
+     *
+     * @param[in]     window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+     * @param[in,out] queue  Command queue on which to enqueue the kernel.
+     */
+    void run_reduced(const Window &window);
+    /** run the generic convolution layer input reshape kernel
+     *
+     * @param[in]     window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+     * @param[in,out] queue  Command queue on which to enqueue the kernel.
+     */
+    void run_generic(const Window &window);
+
+    /** Common signature for the kernel to run */
+    using Im2ColFunction = void (GCIm2ColKernel::*)(const Window &);
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+    std::pair<unsigned int, unsigned int> _convolved_dims;
+    unsigned int   _num_elems_processed_per_iteration;
+    Im2ColFunction _run_func;
+};
+}
+
+#endif /*__ARM_COMPUTE_GCIM2COLKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h
new file mode 100644
index 0000000000..e8bc7ad2b2
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCNORMALIZATIONLAYERKERNEL_H__
+#define __ARM_COMPUTE_GCNORMALIZATIONLAYERKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the normalization layer kernel.
+ */
+class GCNormalizationLayerKernel : public IGCKernel
+{
+public:
+    /** Constructor */
+    GCNormalizationLayerKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCNormalizationLayerKernel(const GCNormalizationLayerKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCNormalizationLayerKernel &operator=(const GCNormalizationLayerKernel &) = delete;
+    /** Default Move Constructor. */
+    GCNormalizationLayerKernel(GCNormalizationLayerKernel &&) = default;
+    /** Default move assignment operator. */
+    GCNormalizationLayerKernel &operator=(GCNormalizationLayerKernel &&) = default;
+    /** Default destrutor */
+    ~GCNormalizationLayerKernel() = default;
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input         Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
+     *                           and an optional 4th dimension for batch of inputs. Data types supported: F32.
+     * @param[in]  squared_input Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM],
+     *                           Data types should match the input type.
+     * @param[out] output        Destination tensor. Output will have the same number of dimensions as input. Data types should match the input type.
+     * @param[in]  norm_info     Normalization layer information like the normalization type, normalization size and other parameters.
+     */
+    void configure(const IGCTensor *input, const IGCTensor *squared_input, IGCTensor *output, NormalizationLayerInfo norm_info);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+    BorderSize border_size() const override;
+
+private:
+    const IGCTensor *_input;
+    const IGCTensor *_squared_input;
+    IGCTensor       *_output;
+    BorderSize       _border_size;
+};
+}
+#endif /*__ARM_COMPUTE_GCNORMALIZATIONLAYERKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h
new file mode 100644
index 0000000000..3b01b4ad4d
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCPIXELWISEMULTIPLICATIONKERNEL_H__
+#define __ARM_COMPUTE_GCPIXELWISEMULTIPLICATIONKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the pixelwise multiplication kernel.
+ *
+ */
+class GCPixelWiseMultiplicationKernel : public IGCKernel
+{
+public:
+    /** Default constructor.*/
+    GCPixelWiseMultiplicationKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    GCPixelWiseMultiplicationKernel(const GCPixelWiseMultiplicationKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    GCPixelWiseMultiplicationKernel &operator=(const GCPixelWiseMultiplicationKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCPixelWiseMultiplicationKernel(GCPixelWiseMultiplicationKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCPixelWiseMultiplicationKernel &operator=(GCPixelWiseMultiplicationKernel &&) = default;
+    /** Initialise the kernel's input, output and border mode.
+     *
+     * @param[in]  input1 An input tensor. Data types supported: F32.
+     * @param[in]  input2 An input tensor. Data types supported: same as @p input1.
+     * @param[out] output The output tensor, Data types supported: same as @p input1.
+     * @param[in]  scale  Scale to apply after multiplication.
+     *                             Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
+     */
+    void configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output, float scale);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input1;
+    const IGCTensor *_input2;
+    IGCTensor       *_output;
+};
+}
+
+#endif /*__ARM_COMPUTE_GCPIXELWISEMULTIPLICATIONKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h
new file mode 100644
index 0000000000..d4921c2092
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCPOOLINGLAYERKERNEL_H__
+#define __ARM_COMPUTE_GCPOOLINGLAYERKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the pooling layer kernel */
+class GCPoolingLayerKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCPoolingLayerKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCPoolingLayerKernel(const GCPoolingLayerKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCPoolingLayerKernel &operator=(const GCPoolingLayerKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCPoolingLayerKernel(GCPoolingLayerKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCPoolingLayerKernel &operator=(GCPoolingLayerKernel &&) = default;
+    /** Default destructor */
+    ~GCPoolingLayerKernel() = default;
+
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input     Source tensor. Data types supported: F16/F32.
+     * @param[out] output    Destination tensor. Data types supported: Same as @p input.
+     * @param[in]  pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+     */
+    void configure(const IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+    BorderSize border_size() const override;
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+    PoolingLayerInfo _pool_info;
+    BorderSize       _border_size;
+    unsigned int     _num_elems_processed_per_iteration;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_GCPOOLINGLAYERKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h
new file mode 100644
index 0000000000..b9eb305bab
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCSOFTMAXLAYERKERNEL_H__
+#define __ARM_COMPUTE_GCSOFTMAXLAYERKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the identifying the max value of 1D Logits */
+class GCLogits1DMaxKernel : public IGCSimple3DKernel
+{
+public:
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input  Source tensor. Data types supported: F16/F32
+     * @param[out] output Destination tensor. Data types supported: same as @p input
+     */
+    void configure(const IGCTensor *input, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+};
+
+/** Interface for shifting the logits values around the max value and exponentiating the result */
+class GCLogits1DShiftExpSumKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCLogits1DShiftExpSumKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCLogits1DShiftExpSumKernel(const GCLogits1DShiftExpSumKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCLogits1DShiftExpSumKernel &operator=(const GCLogits1DShiftExpSumKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCLogits1DShiftExpSumKernel(GCLogits1DShiftExpSumKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCLogits1DShiftExpSumKernel &operator=(GCLogits1DShiftExpSumKernel &&) = default;
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input  Source tensor. Data types supported: F16/F32
+     * @param[in]  max    Max values tensor. Data types supported: same as @p input
+     * @param[out] output Destination tensor. Data types supported: same as @p input
+     * @param[out] sum    Sum of 1D logits tensor. Data types supported: same as @p input
+     */
+    void configure(const IGCTensor *input, const IGCTensor *max, IGCTensor *output, IGCTensor *sum);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    const IGCTensor *_max;
+    IGCTensor       *_output;
+    IGCTensor       *_sum;
+};
+
+/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */
+class GCLogits1DNormKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCLogits1DNormKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCLogits1DNormKernel(const GCLogits1DNormKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCLogits1DNormKernel &operator=(const GCLogits1DNormKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCLogits1DNormKernel(GCLogits1DNormKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCLogits1DNormKernel &operator=(GCLogits1DNormKernel &&) = default;
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input  Source tensor. Data types supported: F16/F32
+     * @param[in]  sum    Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
+     * @param[out] output Destination tensor. Data types supported: same as @p input
+     */
+    void configure(const IGCTensor *input, const IGCTensor *sum, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    const IGCTensor *_sum;
+    IGCTensor       *_output;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_GCSOFTMAXLAYERKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h
new file mode 100644
index 0000000000..c628a00585
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCTRANSPOSEKERNEL_H__
+#define __ARM_COMPUTE_GCTRANSPOSEKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** OpenGL ES kernel which transposes the elements of a matrix.
+ *
+ * [width, height, batch] -> [height, width, batch]
+ *
+ */
+class GCTransposeKernel : public IGCSimple2DKernel
+{
+public:
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  input  Input tensor. Data types supported: F16/F32
+     * @param[out] output Output tensor. Data type supported: Same as @p input
+     */
+    void configure(const IGCTensor *input, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+};
+}
+#endif /* __ARM_COMPUTE_GCTRANSPOSEKERNEL_H__ */
diff --git a/arm_compute/core/Log.h b/arm_compute/core/Log.h
index 488c8e6190..70e7c51110 100644
--- a/arm_compute/core/Log.h
+++ b/arm_compute/core/Log.h
@@ -44,9 +44,12 @@
  * @param[in] log_level Logging level
  * @param[in] msg       Message to log
  */
-#define ARM_COMPUTE_LOG_MSG_CORE(log_level, msg) \
-    ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();    \
-    ARM_COMPUTE_LOG_MSG("CORE", log_level, msg)
+#define ARM_COMPUTE_LOG_MSG_CORE(log_level, msg)     \
+    do                                               \
+    {                                                \
+        ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();    \
+        ARM_COMPUTE_LOG_MSG("CORE", log_level, msg); \
+    } while(false)
 
 /** Log a message with format to the core system logger
  *
@@ -54,42 +57,57 @@
  * @param[in] fmt       String format (printf style)
  * @param[in] ...       Message arguments
  */
-#define ARM_COMPUTE_LOG_MSG_WITH_FORMAT_CORE(log_level, fmt, ...) \
-    ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();                     \
-    ARM_COMPUTE_LOG_MSG_WITH_FORMAT("CORE", log_level, fmt, __VA_ARGS__)
+#define ARM_COMPUTE_LOG_MSG_WITH_FORMAT_CORE(log_level, fmt, ...)             \
+    do                                                                        \
+    {                                                                         \
+        ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();                             \
+        ARM_COMPUTE_LOG_MSG_WITH_FORMAT("CORE", log_level, fmt, __VA_ARGS__); \
+    } while(false)
 
 /** Log a stream to the core system logger
  *
  * @param[in] log_level Logging level
  * @param[in] ss        Stream to log
  */
-#define ARM_COMPUTE_LOG_STREAM_CORE(log_level, ss) \
-    ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();      \
-    ARM_COMPUTE_LOG_STREAM("CORE", log_level, ss)
+#define ARM_COMPUTE_LOG_STREAM_CORE(log_level, ss)     \
+    do                                                 \
+    {                                                  \
+        ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();      \
+        ARM_COMPUTE_LOG_STREAM("CORE", log_level, ss); \
+    } while(false)
 
 /** Log information level message to the core system logger
  *
  * @param[in] msg Stream to log
  */
-#define ARM_COMPUTE_LOG_INFO_MSG_CORE(msg)    \
-    ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \
-    ARM_COMPUTE_LOG_MSG_CORE(arm_compute::logging::LogLevel::INFO, msg)
+#define ARM_COMPUTE_LOG_INFO_MSG_CORE(msg)                                   \
+    do                                                                       \
+    {                                                                        \
+        ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();                            \
+        ARM_COMPUTE_LOG_MSG_CORE(arm_compute::logging::LogLevel::INFO, msg); \
+    } while(false)
 
 /** Log information level formatted message to the core system logger
  *
  * @param[in] fmt String format (printf style)
  * @param[in] ... Message arguments
  */
-#define ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE(fmt, ...) \
-    ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();               \
-    ARM_COMPUTE_LOG_MSG_WITH_FORMAT_CORE(arm_compute::logging::LogLevel::INFO, msg)
+#define ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE(fmt, ...)                                           \
+    do                                                                                                \
+    {                                                                                                 \
+        ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();                                                     \
+        ARM_COMPUTE_LOG_MSG_WITH_FORMAT_CORE(arm_compute::logging::LogLevel::INFO, fmt, __VA_ARGS__); \
+    } while(false)
 
 /** Log information level stream to the core system logger
  *
  * @param[in] ss Message to log
  */
-#define ARM_COMPUTE_LOG_INFO_STREAM_CORE(ss)  \
-    ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \
-    ARM_COMPUTE_LOG_STREAM_CORE(arm_compute::logging::LogLevel::INFO, ss)
+#define ARM_COMPUTE_LOG_INFO_STREAM_CORE(ss)                                   \
+    do                                                                         \
+    {                                                                          \
+        ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();                              \
+        ARM_COMPUTE_LOG_STREAM_CORE(arm_compute::logging::LogLevel::INFO, ss); \
+    } while(false)
 
 #endif /* __ARM_COMPUTE_LOGGING_MACROS_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h
index 04c4c9ebba..1dfe075310 100644
--- a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h
@@ -54,12 +54,12 @@ public:
      * @param[in, out] input   Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
      *                         3 lower dimensions represent a single input with dimensions [width, height, FM].
      *                         The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32.
+     * @param[out]     output  Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
      * @param[in]      mean    Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      var     Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
-     * @param[in]      gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      beta    Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]      gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      epsilon Small value to avoid division with zero.
-     * @param[out]     output  Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
      */
     void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon);
 
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index 96e99e6874..b8c5b34e5a 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -837,7 +837,16 @@ void print_consecutive_elements_impl(std::ostream &s, const T *ptr, unsigned int
         {
             s.width(stream_width);
         }
-        s << std::right << static_cast<print_type>(ptr[i]) << element_delim;
+
+        if(std::is_same<typename std::decay<T>::type, half>::value)
+        {
+            // We use T instead of print_type here is because the std::is_floating_point<half> returns false and then the print_type becomes int.
+            s << std::right << static_cast<T>(ptr[i]) << element_delim;
+        }
+        else
+        {
+            s << std::right << static_cast<print_type>(ptr[i]) << element_delim;
+        }
     }
 }
 
@@ -859,7 +868,17 @@ int max_consecutive_elements_display_width_impl(std::ostream &s, const T *ptr, u
     {
         std::stringstream ss;
         ss.copyfmt(s);
-        ss << static_cast<print_type>(ptr[i]);
+
+        if(std::is_same<typename std::decay<T>::type, half>::value)
+        {
+            // We use T instead of print_type here is because the std::is_floating_point<half> returns false and then the print_type becomes int.
+            ss << static_cast<T>(ptr[i]);
+        }
+        else
+        {
+            ss << static_cast<print_type>(ptr[i]);
+        }
+
         max_width = std::max<int>(max_width, ss.str().size());
     }
     return max_width;
diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
index 70a201a1f8..d84ba69da2 100644
--- a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
@@ -54,8 +54,8 @@ public:
      * @param[out]     output  Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
      * @param[in]      mean    Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      var     Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
-     * @param[in]      gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      beta    Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]      gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      epsilon Small value to avoid division with zero.
      */
     void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon);
@@ -63,12 +63,12 @@ public:
      *
      * @param[in] input   Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result.
      *                    3 lower dimensions represent a single input with dimensions [width, height, FM].
-     * @param[in] output  Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input
      *                    The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32.
+     * @param[in] output  Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input
      * @param[in] mean    Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in] var     Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
-     * @param[in] gamma   Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in] beta    Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in] gamma   Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in] epsilon Small value to avoid division with zero.
      *
      * @return an error status
diff --git a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
index 0818cec2e5..1e0b27ae43 100644
--- a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
@@ -37,7 +37,7 @@ namespace arm_compute
 {
 class ICLTensor;
 
-/** Basic function to simulate a normalization layer. This function calls the following CL kernels:
+/** Basic function to compute a normalization layer. This function calls the following CL kernels:
  *
  * -# @ref CLFillBorderKernel
  * -# @ref CLNormalizationLayerKernel
@@ -55,7 +55,7 @@ public:
      * @param[out]     output    Destination tensor. Dimensions, data type and number of channels must match the input ones.
      * @param[in]      norm_info Normalization layer information like the normalization type, normalization size and other parameters.
      */
-    void configure(ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info);
+    void configure(ICLTensor *input, ICLTensor *output, const NormalizationLayerInfo &norm_info);
 
     // Inherited methods overridden:
     void run() override;
diff --git a/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h b/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h
new file mode 100644
index 0000000000..8a345c5fab
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCFUNCTIONS_H__
+#define __ARM_COMPUTE_GCFUNCTIONS_H__
+
+/* Header regrouping all the GLES compute functions */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCFillBorder.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCTranspose.h"
+
+#endif /* __ARM_COMPUTE_GCFUNCTIONS_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/GCScheduler.h b/arm_compute/runtime/GLES_COMPUTE/GCScheduler.h
new file mode 100644
index 0000000000..817f8b54b1
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/GCScheduler.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCSCHEDULER_H__
+#define __ARM_COMPUTE_GCSCHEDULER_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class IGCKernel;
+
+/** Provides global access to a OpenGL ES context and command queue. */
+class GCScheduler
+{
+private:
+    /** Constructor */
+    GCScheduler();
+
+public:
+    /** Access the scheduler singleton.
+     *
+     * @return The scheduler
+     */
+    static GCScheduler &get();
+
+    /** Initialises the context and command queue used by the scheduler to default values
+     *  and sets a default device and kernel path for the @ref GCKernelLibrary.
+     */
+    void default_init();
+
+    /** Schedule the execution of the passed kernel if possible.
+     *
+     * @param[in] kernel Kernel to execute.
+     * @param[in] flush  (Optional) Specifies if the command queue will be flushed after running the kernel.
+     */
+    void enqueue(IGCKernel &kernel, bool flush = true);
+
+    /** Initialises the display and context to be used by the scheduler.
+     *
+     * @param[in] dpy The EGL display connection
+     * @param[in] ctx The EGL rendering context
+     */
+    void init(EGLDisplay dpy, EGLContext ctx);
+
+    /** Blocks until all commands in the associated command queue have finished. */
+    void sync();
+};
+}
+
+#endif /* __ARM_COMPUTE_GCSCHEDULER_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/GCTensor.h b/arm_compute/runtime/GLES_COMPUTE/GCTensor.h
new file mode 100644
index 0000000000..3e51f9908f
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/GCTensor.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCTENSOR_H__
+#define __ARM_COMPUTE_GCTENSOR_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+
+namespace arm_compute
+{
+class ITensorAllocator;
+class ITensorInfo;
+
+/** Interface for OpenGL ES tensor */
+class GCTensor : public IGCTensor
+{
+public:
+    /** Default constructor */
+    GCTensor();
+
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    GCTensor(const GCTensor &) = delete;
+
+    /** Prevent instances of this class from being copy assigned (As this class contains pointers). */
+    GCTensor &operator=(const GCTensor &) = delete;
+
+    /** Allow instances of this class to be moved */
+    GCTensor(GCTensor &&) = default;
+
+    /** Allow instances of this class to be moved */
+    GCTensor &operator=(GCTensor &&) = default;
+
+    /** Virtual destructor */
+    virtual ~GCTensor() = default;
+
+    /** Return a pointer to the tensor's allocator
+     *
+     * @return A pointer to the tensor's allocator
+     */
+    ITensorAllocator *allocator();
+
+    /** Enqueue a map operation of the allocated buffer on the given queue.
+     *
+     * @param[in] blocking (Optional) If true, then the mapping will be ready to use by the time
+     *                     this method returns, else it is the caller's responsibility
+     *                     to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
+     *
+     * @return The mapping address.
+     */
+    void map(bool blocking = true);
+
+    /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue.
+     *
+     * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+     *       the memory is accessed by the device.
+     *
+     */
+    void unmap();
+
+    // Inherited methods overridden:
+    TensorInfo *info() const override;
+    TensorInfo *info() override;
+    uint8_t    *buffer() const override;
+    GLuint      gc_buffer() const override;
+
+protected:
+    // Inherited methods overridden:
+    uint8_t *do_map(bool blocking) override;
+    void do_unmap() override;
+
+private:
+    mutable GCTensorAllocator _allocator;
+};
+
+using GCImage = GCTensor;
+}
+
+#endif /*__ARM_COMPUTE_GCTENSOR_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h b/arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h
new file mode 100644
index 0000000000..ce52cbbbdc
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCTENSORALLOCATOR_H__
+#define __ARM_COMPUTE_GCTENSORALLOCATOR_H__
+
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/runtime/ITensorAllocator.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+/** Basic implementation of a GLES memory tensor allocator. */
+class GCTensorAllocator : public ITensorAllocator
+{
+public:
+    /** Default constructor. */
+    GCTensorAllocator();
+
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    GCTensorAllocator(const GCTensorAllocator &) = delete;
+
+    /** Prevent instances of this class from being copy assigned (As this class contains pointers). */
+    GCTensorAllocator &operator=(const GCTensorAllocator &) = delete;
+
+    /** Allow instances of this class to be moved */
+    GCTensorAllocator(GCTensorAllocator &&) = default;
+
+    /** Allow instances of this class to be moved */
+    GCTensorAllocator &operator=(GCTensorAllocator &&) = default;
+
+    /** Default destructor */
+    ~GCTensorAllocator() = default;
+
+    /** Interface to be implemented by the child class to return the pointer to the mapped data. */
+    uint8_t *data();
+
+    /** Get the OpenGL ES buffer object name
+     *
+     * @return The buffer object name
+     */
+    GLuint get_gl_ssbo_name() const;
+
+    /** Enqueue a map operation of the allocated buffer on the given queue.
+     *
+     * @param[in] blocking If true, then the mapping will be ready to use by the time
+     *                     this method returns, else it is the caller's responsibility
+     *                     to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
+     *
+     * @return The mapping address.
+     */
+    uint8_t *map(bool blocking);
+
+    /** Enqueue an unmap operation of the allocated buffer on the given queue.
+     *
+     * @note This method simply enqueue the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+     *       the memory is accessed by the device.
+     *
+     */
+    void unmap();
+
+    /** Allocate size specified by TensorInfo of GLES memory.
+     *
+     * @note: The tensor must not already be allocated when calling this function.
+     *
+     */
+    void allocate() override;
+
+    /** Free allocated GLES memory.
+     *
+     * @note The tensor must have been allocated when calling this function.
+     *
+     */
+    void free() override;
+
+protected:
+    /** Call map() on the SSBO.
+     *
+     * @return A pointer to the beginning of the tensor's allocation.
+     */
+    uint8_t *lock() override;
+
+    /** Call unmap() on the SSBO. */
+    void unlock() override;
+
+private:
+    class GLBufferWrapper
+    {
+    public:
+        GLBufferWrapper()
+            : _ssbo_name(0)
+        {
+            ARM_COMPUTE_GL_CHECK(glGenBuffers(1, &_ssbo_name));
+        }
+        ~GLBufferWrapper()
+        {
+            ARM_COMPUTE_GL_CHECK(glDeleteBuffers(1, &_ssbo_name));
+        }
+        GLuint _ssbo_name;
+    };
+    std::unique_ptr<GLBufferWrapper> _gl_buffer;
+    uint8_t                         *_mapping;
+};
+}
+
+#endif /* __ARM_COMPUTE_GCTENSORALLOCATOR_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h b/arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h
new file mode 100644
index 0000000000..15bbfffe95
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_IGCSIMPLEFUNCTION_H__
+#define __ARM_COMPUTE_IGCSIMPLEFUNCTION_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+/** Basic interface for functions which have a single OpenGL ES kernel */
+class IGCSimpleFunction : public IFunction
+{
+public:
+    /** Default constructor */
+    IGCSimpleFunction();
+
+    // Inherited methods overridden:
+    void run() override final;
+
+protected:
+    std::unique_ptr<IGCKernel> _kernel;         /**< Kernel to run */
+    GCFillBorderKernel         _border_handler; /**< Kernel to handle  borders */
+};
+}
+#endif /*__ARM_COMPUTE_IGCSIMPLEFUNCTION_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.h
new file mode 100644
index 0000000000..0d4a354e26
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCABSOLUTEDIFFERENCE_H__
+#define __ARM_COMPUTE_GCABSOLUTEDIFFERENCE_H__
+
+#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to run @ref GCAbsoluteDifferenceKernel
+ *
+ * @note The tensor data types for the inputs must be U8.
+ * @note The function calculates the absolute difference also when the 2 inputs have different tensor data types.
+ */
+class GCAbsoluteDifference : public IGCSimpleFunction
+{
+public:
+    /** Initialize the function
+     *
+     * @param[in]  input1 First input tensor. Data types supported: U8
+     * @param[in]  input2 Second input tensor. Data types supported: U8
+     * @param[out] output Output tensor. Data types supported: U8
+     */
+    void configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output);
+};
+}
+
+#endif /* __ARM_COMPUTE_GCABSOLUTEDIFFERENCE_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h
new file mode 100644
index 0000000000..b43456b2cd
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCACTIVATIONLAYER_H__
+#define __ARM_COMPUTE_GCACTIVATIONLAYER_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Basic function to run @ref GCActivationLayerKernel
+ *
+ * @note The function simulates an activation layer with the specified activation function.
+ */
+class GCActivationLayer : public IGCSimpleFunction
+{
+public:
+    /** Set the input and output tensor.
+     *
+     * @note If the output tensor is a nullptr, the activation function will be performed in-place
+     *
+     * @param[in, out] input    Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
+     *                          of the activation function. Data types supported: F16/F32.
+     * @param[out]     output   Destination tensor. Data type supported: same as @p input
+     * @param[in]      act_info Activation layer parameters.
+     */
+    void configure(IGCTensor *input, IGCTensor *output, ActivationLayerInfo act_info);
+};
+}
+#endif /* __ARM_COMPUTE_GCACTIVATIONLAYER_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h
new file mode 100644
index 0000000000..9d81b9a7f7
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCBATCHNORMALIZATIONLAYER_H__
+#define __ARM_COMPUTE_GCBATCHNORMALIZATIONLAYER_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Basic function to run @ref GCBatchNormalizationLayerKernel and simulate a batch normalization layer.
+ *
+ * Batch normalization is calculated by:
+ * @f[ out_i = \gamma * (\frac{in_i - \mu_{B}}{\sqrt{\sigma^2_{B} + \epsilon}}) + \beta \equiv BN_{\gamma,\beta}(in_i) @f]
+ *
+ */
+class GCBatchNormalizationLayer : public IFunction
+{
+public:
+    /** Default constructor */
+    GCBatchNormalizationLayer();
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input   Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM].
+     *                     The rest are optional and used for representing batches. Data types supported: F16/F32.
+     * @param[out] output  Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
+     * @param[in]  mean    Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  var     Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  beta    Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  epsilon Small value to avoid division with zero.
+     */
+    void configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta, const IGCTensor *gamma, float epsilon);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    GCBatchNormalizationLayerKernel _norm_kernel; /**< BatchNormalization layer kernel to run */
+};
+}
+#endif /* __ARM_COMPUTE_GCBATCHNORMALIZATIONLAYER_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h
new file mode 100644
index 0000000000..801dc0e111
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCDEPTHCONCATENATE_H__
+#define __ARM_COMPUTE_GCDEPTHCONCATENATE_H__
+
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+#include <vector>
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels:
+ *
+ * -# @ref GCFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions)
+ * -# @ref GCDepthConcatenateKernel
+ *
+ */
+class GCDepthConcatenate : public IFunction
+{
+public:
+    /** Default constructor */
+    GCDepthConcatenate();
+    /** Initialise the kernel's inputs vector and output.
+     *
+     * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: F16/F32.
+     * @param[out]    output        Output tensor. Data types supported: Same as @p input.
+     */
+    void configure(std::vector<IGCTensor *> inputs_vector, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    std::unique_ptr<GCDepthConcatenateKernel[]> _concat_kernels_vector;
+    std::unique_ptr<GCFillBorderKernel[]>       _border_handlers_vector;
+    unsigned int                                _num_inputs;
+};
+}
+#endif /* __ARM_COMPUTE_GCDEPTHCONCATENATE_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h
new file mode 100644
index 0000000000..5472bdb9ea
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYER_H__
+#define __ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYER_H__
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Basic function to execute direct convolution function:
+ *
+ * @note Supported kernel size: 1x1, 3x3, and 5x5
+ * @note This OpenGL ES implementation works with stride_x = 1 and 2
+ */
+class GCDirectConvolutionLayer : public IGCSimpleFunction
+{
+public:
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input     Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+     *                       while every optional dimension from 4 and above represent a batch of inputs.
+     *                       Data types supported: F16/F32.
+     * @param[in]  weights   Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+     * @param[in]  biases    Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input.
+     * @param[out] output    Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+     *                       Data types supported: Same as @p input.
+     * @param[in]  conv_info Contains padding and stride information described in @ref PadStrideInfo.
+     */
+    void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info);
+};
+}
+#endif /* __ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYER_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h
new file mode 100644
index 0000000000..6a08d96676
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCDROPOUTLAYER_H__
+#define __ARM_COMPUTE_GCDROPOUTLAYER_H__
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+/** Basic function to do dropout op. This function calls the following kernels:
+ *
+ *  -# @ref GCDropoutKernel
+ */
+class GCDropoutLayer : public IFunction
+{
+public:
+    /** Constructor */
+    GCDropoutLayer();
+
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input   Source tensor. Data type supported: F16/F32.
+     * @param[out] mask    Destination tensor. Data type supported: Same as @p input.
+     * @param[out] output  Destination tensor. Data type supported: Same as @p input.
+     * @param[in]  ratio   Dropout ratio
+     * @param[in]  forward Forward or backward propagation
+     *
+     */
+    void configure(const IGCTensor *input, IGCTensor *mask, IGCTensor *output, float ratio, bool forward);
+
+    //Inherited methods override
+    void run() override;
+
+private:
+    GCDropoutKernel _dropout_kernel;
+};
+}
+
+#endif /* __ARM_COMPUTE_GCDROPOUTLAYER_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCFillBorder.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCFillBorder.h
new file mode 100644
index 0000000000..a04e4002ff
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCFillBorder.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCFILLBORDER_H__
+#define __ARM_COMPUTE_GCFILLBORDER_H__
+
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to run @ref GCFillBorderKernel */
+class GCFillBorder : public IGCSimpleFunction
+{
+public:
+    /** Initialize the function
+     *
+     * @param[in,out] tensor                Source tensor. Data types supported: F16/F32
+     * @param[in]     border_width          The border width
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(IGCTensor *tensor, unsigned int border_width, BorderMode border_mode,
+                   const PixelValue &constant_border_value = PixelValue());
+};
+}
+
+#endif /*__ARM_COMPUTE_FILLBORDER_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h
new file mode 100644
index 0000000000..1ae5837de0
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCFULLYCONNECTEDLAYER_H__
+#define __ARM_COMPUTE_GCFULLYCONNECTEDLAYER_H__
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+
+namespace arm_compute
+{
+/** Basic function to reshape the weights of Fully Connected layer with OpenGL ES. This function calls the following kernels:
+ *
+ *  -# @ref GCTransposeKernel
+ *
+ * @note  The fully connected layer accepts "weights" tensors only with 2 dimensions.
+ */
+class GCFullyConnectedLayerReshapeWeights : public IGCSimpleFunction
+{
+public:
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input  Weights tensor. The weights must be 2 dimensional. Data types supported: F16/F32.
+     * @param[out] output Destination tensor which stores the transposed input tensor. Data type supported: Same as @p input.
+     */
+    void configure(const IGCTensor *input, IGCTensor *output);
+};
+
+/** Basic function to compute a Fully Connected layer on OpenGL ES. This function calls the following OpenGL ES kernels:
+ *
+ *  -# @ref GCIm2ColKernel (called when the input comes from a convolutional layer)
+ *  -# @ref GCFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once)
+ *  -# @ref GCGEMMMatrixMultiplyKernel
+ *  -# @ref GCGEMMMatrixAccumulateBiasesKernel (if @p biases is not equal to nullptr)
+ *
+ * @note  The fully connected layer accepts "weights" tensors only with 2 dimensions.
+ */
+class GCFullyConnectedLayer : public IFunction
+{
+public:
+    /** Constructor */
+    GCFullyConnectedLayer();
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input                Source tensor. Data type supported: F16/F32.
+     * @param[in]  weights              Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input
+     * @param[in]  biases               Bias tensor. It can be nullptr. Data type supported:Same as @p input.
+     * @param[out] output               Destination tensor. Data type supported: Same as @p input.
+     * @param[in]  transpose_weights    (Optional) Transpose weights if true. Defaults to true.
+     * @param[in]  are_weights_reshaped (Optional) Reshape the weights tensor if false. Defaults to false.
+     */
+    void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, bool transpose_weights = true, bool are_weights_reshaped = false);
+
+    //Inherited methods override
+    void run() override;
+
+private:
+    void configure_fc_fc(const IGCTensor *input, const IGCTensor *weights, IGCTensor *output);
+    void configure_conv_fc(const IGCTensor *input, const IGCTensor *weights, IGCTensor *output);
+
+    GCIm2ColKernel                      _im2col_kernel;
+    GCFullyConnectedLayerReshapeWeights _reshape_weights_kernel;
+    GCGEMMMatrixMultiplyKernel          _mm_kernel;
+    GCGEMMMatrixAccumulateBiasesKernel  _accumulate_biases_kernel;
+    GCTensor                            _im2col_output;
+    GCTensor                            _reshape_weights_output;
+    bool                                _are_weights_reshaped;
+    bool                                _is_fc_after_conv;
+    bool                                _accumulate_biases;
+};
+}
+#endif /* __ARM_COMPUTE_GCFULLYCONNECTEDLAYER_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h
new file mode 100644
index 0000000000..f2484cd801
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCGEMM_H__
+#define __ARM_COMPUTE_GCGEMM_H__
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Basic function to execute GEMM on OpenGLES Compute. This function calls the following kernels:
+ *
+ *  -# @ref GCGEMMInterleave4x4Kernel (if the output tensor is a matrix)
+ *  -# @ref GCGEMMTranspose1xWKernel (if the output tensor is a matrix)
+ *  -# @ref GCGEMMMatrixMultiplyKernel
+ *  -# @ref GCGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0)
+ *
+ */
+class GCGEMM : public IFunction
+{
+public:
+    /** Default constructor. */
+    GCGEMM();
+
+    /** Initialise the kernel's inputs and output
+     *
+     * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
+     *
+     * @note All tensors must have the same data type.
+     *
+     * @note Whilst the first input tensor can be a vector, the second input tensor must be at least a matrix
+     *
+     * @param[in]  a      First input tensor  (Matrix or Vector A). Data types supported: F32
+     * @param[in]  b      Second input tensor (Matrix B). Data type supported: same as @p a.
+     * @param[in]  c      Third input tensor  (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a.
+     * @param[out] output Output tensor. Data type supported: same as @p a
+     * @param[in]  alpha  Weight of the matrix product
+     * @param[in]  beta   Weight of matrix C
+     */
+    void configure(const IGCTensor *a, const IGCTensor *b, const IGCTensor *c, IGCTensor *output, float alpha, float beta);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    GCGEMMInterleave4x4Kernel  _interleave_kernel;
+    GCGEMMTranspose1xWKernel   _transpose_kernel;
+    GCGEMMMatrixMultiplyKernel _mm_kernel;
+    GCGEMMMatrixAdditionKernel _ma_kernel;
+    GCTensor                   _tmp_a;
+    GCTensor                   _tmp_b;
+    bool                       _is_interleaved_transposed;
+    bool                       _run_addition;
+};
+}
+
+#endif /* __ARM_COMPUTE_GCGEMM_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.h
new file mode 100644
index 0000000000..48fa7ed504
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCGEMMINTERLEAVE4X4_H__
+#define __ARM_COMPUTE_GCGEMMINTERLEAVE4X4_H__
+
+#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to execute GCGEMMInterleave4x4Kernel. This function calls the following OpenGL ES kernel:
+ *
+ *  -# @ref GCGEMMInterleave4x4Kernel
+ *
+ */
+class GCGEMMInterleave4x4 : public IGCSimpleFunction
+{
+public:
+    /** Initialise the kernel's inputs, output
+     *
+     * @param[in]  input  First input tensor. Data types supported: F32
+     * @param[out] output Output tensor. Data type supported: same as @p input
+     */
+    void configure(const IGCTensor *input, IGCTensor *output);
+};
+}
+
+#endif /* __ARM_COMPUTE_GCGEMMINTERLEAVE4X4_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.h
new file mode 100644
index 0000000000..24af2193c3
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCGEMMTRANSPOSE1XW_H__
+#define __ARM_COMPUTE_GCGEMMTRANSPOSE1XW_H__
+
+#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+
+namespace arm_compute
+{
+/** Basic function to execute GCGEMMTranspose1xWKernel. This function calls the following OpenGLES kernels:
+ *
+ *  -# @ref GCGEMMTranspose1xWKernel
+ *
+ */
+class GCGEMMTranspose1xW : public IGCSimpleFunction
+{
+public:
+    /** Initialise the kernel's inputs, output
+     *
+     * @param[in]  input  First input tensor. Data type supported: F32
+     * @param[out] output Output tensor. Data type supported: same as @p input
+     */
+    void configure(const IGCTensor *input, IGCTensor *output);
+};
+}
+#endif /*__ARM_COMPUTE_GCGEMMTRANSPOSE1XW_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h
new file mode 100644
index 0000000000..d080a2f7b9
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCNORMALIZATIONLAYER_H__
+#define __ARM_COMPUTE_GCNORMALIZATIONLAYER_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Basic function to compute a normalization layer. This function calls the following OpenGL ES kernels:
+ *
+ * -# @ref GCPixelWiseMultiplicationKernel
+ * -# @ref GCFillBorderKernel
+ * -# @ref GCNormalizationLayerKernel
+ *
+ */
+class GCNormalizationLayer : public IFunction
+{
+public:
+    /** Default constructor */
+    GCNormalizationLayer();
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input     Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
+     *                       and an optional 4th dimension for batch of inputs. Data types supported: F32. Number of channels must be 1.
+     * @param[out] output    Destination tensor. Dimensions, data type and number of channels must match the input ones.
+     * @param[in]  norm_info Normalization layer information like the normalization type, normalization size and other parameters.
+     */
+    void configure(const IGCTensor *input, IGCTensor *output, const NormalizationLayerInfo &norm_info);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    GCTensor                        _squared_input;   /**< The intermediate buffer which stores results of squaring input*/
+    GCNormalizationLayerKernel      _norm_kernel;     /**< Normalization layer kernel to run */
+    GCPixelWiseMultiplicationKernel _multiply_kernel; /**< Pixel multiplication kernel to run */
+    GCFillBorderKernel              _border_handler;  /**< Kernel to handle  borders */
+};
+}
+#endif /* __ARM_COMPUTE_GCNORMALIZATIONLAYER_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.h
new file mode 100644
index 0000000000..e6239edc2f
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCPIXELWISEMULTIPLICATION_H__
+#define __ARM_COMPUTE_GCPIXELWISEMULTIPLICATION_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Basic function to run @ref GCPixelWiseMultiplicationKernel. */
+class GCPixelWiseMultiplication : public IGCSimpleFunction
+{
+public:
+    /** Initialise the kernel's inputs, output and convertion policy.
+     *
+     * @param[in]  input1 First tensor input. Data types supported: F32.
+     * @param[in]  input2 Second tensor input. Data types supported: Same as @p input1.
+     * @param[out] output Output tensor. Data types supported: Same as @p input1.
+     * @param[in]  scale  Scale to apply after multiplication. Must be a positive value.
+     */
+    void configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output, float scale);
+};
+}
+#endif /*__ARM_COMPUTE_GCPIXELWISEMULTIPLICATION_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h
new file mode 100644
index 0000000000..cce44d0c3c
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCPOOLINGLAYER_H__
+#define __ARM_COMPUTE_GCPOOLINGLAYER_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following OpenGL ES kernels:
+ *
+ * -# @ref GCFillBorderKernel (executed if padding size is different from zero)
+ * -# @ref GCPoolingLayerKernel
+ */
+class GCPoolingLayer : public IGCSimpleFunction
+{
+public:
+    /** Set the input and output tensors.
+     *
+     * @param[in,out] input     Source tensor. (Written to only when padding != 0) Data types supported: F16/F32.
+     * @param[out]    output    Destination tensor. Data types supported: Same as @p input.
+     * @param[in]     pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+     */
+    void configure(IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info);
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_GCPOOLINGLAYER_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h
new file mode 100644
index 0000000000..19bfb83eca
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCSOFTMAXLAYER_H__
+#define __ARM_COMPUTE_GCSOFTMAXLAYER_H__
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Basic function to compute a SoftmaxLayer.
+ *
+ * Softmax is calculated by :
+ * @f[ out = exp(x - max(x)) / sum(exp(x - max(x))) @f]
+ *
+ * This function runs the following kernels:
+ * -# @ref GCLogits1DMaxKernel
+ * -# @ref GCLogits1DShiftExpSumKernel
+ * -# @ref GCLogits1DNormKernel
+ */
+class GCSoftmaxLayer : public IFunction
+{
+public:
+    /** Constructor */
+    GCSoftmaxLayer();
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input  Source tensor. Data types supported: F16/F32
+     * @param[out] output Destination tensor. Data types supported: same as @p input
+     */
+    void configure(const IGCTensor *input, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    GCLogits1DMaxKernel         _max_kernel;
+    GCLogits1DShiftExpSumKernel _shift_exp_sum_kernel;
+    GCLogits1DNormKernel        _norm_kernel;
+    GCTensor                    _max;
+    GCTensor                    _sum;
+    GCTensor                    _tmp;
+};
+}
+#endif /* __ARM_COMPUTE_GCSOFTMAXLAYER_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCTranspose.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCTranspose.h
new file mode 100644
index 0000000000..23324343f9
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCTranspose.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCTRANSPOSE_H__
+#define __ARM_COMPUTE_GCTRANSPOSE_H__
+
+#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Basic function to transpose a matrix on OpenGL ES. This function calls the following OpenGL ES kernel:
+ *
+ *  -# @ref GCTransposeKernel
+ *
+ */
+class GCTranspose : public IGCSimpleFunction
+{
+public:
+    /** Initialise the kernel's inputs and output
+     *
+     * @param[in]  input  Input tensor. Data types supported: F16/F32
+     * @param[out] output Output tensor. Data type supported: Same as @p input
+     */
+    void configure(const IGCTensor *input, IGCTensor *output);
+};
+}
+
+#endif /* __ARM_COMPUTE_GCTRANSPOSE_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
index 041b9e7290..b2de7162f1 100644
--- a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
@@ -50,12 +50,12 @@ public:
      * @param[in, out] input   Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
      *                         3 lower dimensions represent a single input with dimensions [width, height, FM].
      *                         The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32.
+     * @param[out]     output  Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
      * @param[in]      mean    Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      var     Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
-     * @param[in]      gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      beta    Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]      gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      epsilon Small value to avoid division with zero.
-     * @param[out]     output  Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
      */
     void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon);
 
diff --git a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
index 1c95c5bc4a..0d5656d602 100644
--- a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
@@ -40,7 +40,7 @@ namespace arm_compute
 {
 class ITensor;
 
-/** Basic function to simulate a normalization layer. This function calls the following NEON kernels:
+/** Basic function to compute a normalization layer. This function calls the following NEON kernels:
  *
  * -# @ref NEPixelWiseMultiplicationKernel
  * -# @ref NEFillBorderKernel
@@ -59,7 +59,7 @@ public:
      * @param[out] output    Destination with the same dimensions, data type and number of channels of  @p input
      * @param[in]  norm_info Normalization layer information like the normalization type, normalization size and other parameters.
      */
-    void configure(const ITensor *input, ITensor *output, NormalizationLayerInfo norm_info);
+    void configure(const ITensor *input, ITensor *output, const NormalizationLayerInfo &norm_info);
 
     // Inherited methods overridden:
     void run() override;
-- 
cgit v1.2.1