From 7068f9900d136312318ff430aef588b14e0c87ad Mon Sep 17 00:00:00 2001
From: Anthony Barbier <anthony.barbier@arm.com>
Date: Thu, 26 Oct 2017 15:23:08 +0100
Subject: COMPMID-631: Merge branches/gles_compute branch

Last commit:
commit b25c5f68042b0c81bf611d59a1bb8535e1c42497
Author: Xinghang Zhou <xinghang.zhou@arm.com>
Date:   Wed Oct 25 18:48:10 2017 +0800

    Synced validation's tolerances of GCSoftmax from cl side

Change-Id: Ibe72054205c1c8721845d679a31af7ed0a7c5cf6
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/93283
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
---
 .../kernels/GCAbsoluteDifferenceKernel.h           |  71 +++++++++++++
 .../GLES_COMPUTE/kernels/GCActivationLayerKernel.h |  68 +++++++++++++
 .../kernels/GCBatchNormalizationLayerKernel.h      |  77 ++++++++++++++
 .../core/GLES_COMPUTE/kernels/GCCol2ImKernel.h     |  92 +++++++++++++++++
 .../kernels/GCDepthConcatenateKernel.h             |  76 ++++++++++++++
 .../kernels/GCDirectConvolutionLayerKernel.h       |  87 ++++++++++++++++
 .../core/GLES_COMPUTE/kernels/GCDropoutKernel.h    |  79 +++++++++++++++
 .../core/GLES_COMPUTE/kernels/GCFillBorderKernel.h |  77 ++++++++++++++
 .../kernels/GCGEMMInterleave4x4Kernel.h            |  80 +++++++++++++++
 .../kernels/GCGEMMMatrixAccumulateBiasesKernel.h   |  63 ++++++++++++
 .../kernels/GCGEMMMatrixAdditionKernel.h           |  70 +++++++++++++
 .../kernels/GCGEMMMatrixMultiplyKernel.h           |  79 +++++++++++++++
 .../kernels/GCGEMMTranspose1xWKernel.h             |  67 ++++++++++++
 .../core/GLES_COMPUTE/kernels/GCIm2ColKernel.h     | 109 ++++++++++++++++++++
 .../kernels/GCNormalizationLayerKernel.h           |  72 +++++++++++++
 .../kernels/GCPixelWiseMultiplicationKernel.h      |  70 +++++++++++++
 .../GLES_COMPUTE/kernels/GCPoolingLayerKernel.h    |  70 +++++++++++++
 .../GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h    | 112 +++++++++++++++++++++
 .../core/GLES_COMPUTE/kernels/GCTransposeKernel.h  |  52 ++++++++++
 19 files changed, 1471 insertions(+)
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h

(limited to 'arm_compute/core/GLES_COMPUTE/kernels')

diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h
new file mode 100644
index 0000000000..71f7b37700
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCABSOLUTEDIFFERENCEKERNEL_H__
+#define __ARM_COMPUTE_GCABSOLUTEDIFFERENCEKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the absolute difference kernel.
+ *
+ * Absolute difference is computed by:
+ * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f]
+ */
+class GCAbsoluteDifferenceKernel : public IGCKernel
+{
+public:
+    /** Default constructor. */
+    GCAbsoluteDifferenceKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    GCAbsoluteDifferenceKernel(const GCAbsoluteDifferenceKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    GCAbsoluteDifferenceKernel &operator=(const GCAbsoluteDifferenceKernel &) = delete;
+    /** Allow instances of this class to be moved. */
+    GCAbsoluteDifferenceKernel(GCAbsoluteDifferenceKernel &&) = default;
+    /** Allow instances of this class to be moved. */
+    GCAbsoluteDifferenceKernel &operator=(GCAbsoluteDifferenceKernel &&) = default;
+    /** Default destructor */
+    ~GCAbsoluteDifferenceKernel() = default;
+
+    /** Set the inputs and output images.
+     *
+     * @param[in]  input1 Source tensor. Data types supported: U8
+     * @param[in]  input2 Source tensor. Data types supported: U8
+     * @param[out] output Destination tensor. Data types supported: U8
+     */
+    void configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input1; /**< Source tensor 1. */
+    const IGCTensor *_input2; /**< Source tensor 2. */
+    IGCTensor       *_output; /**< Destination tensor. */
+};
+}
+#endif /* __ARM_COMPUTE_GCABSOLUTEDIFFERENCEKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h
new file mode 100644
index 0000000000..fc1d52f455
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCACTIVATIONLAYERKERNEL_H__
+#define __ARM_COMPUTE_GCACTIVATIONLAYERKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the activation layer kernel. */
+class GCActivationLayerKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCActivationLayerKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCActivationLayerKernel(const GCActivationLayerKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCActivationLayerKernel &operator=(const GCActivationLayerKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCActivationLayerKernel(GCActivationLayerKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCActivationLayerKernel &operator=(GCActivationLayerKernel &&) = default;
+    /** Default destructor */
+    ~GCActivationLayerKernel() = default;
+    /** Set the input and output tensor.
+     *
+     * @note If the output tensor is a nullptr, the activation function will be performed in-place
+     *
+     * @param[in, out] input    Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
+     *                          of the activation function. Data types supported: F16/F32.
+     * @param[out]     output   Destination tensor. Data type should match the input data type.
+     * @param[in]      act_info Activation layer information.
+     */
+    void configure(IGCTensor *input, IGCTensor *output, ActivationLayerInfo act_info);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    IGCTensor *_input;
+    IGCTensor *_output;
+};
+}
+#endif /*__ARM_COMPUTE_GCACTIVATIONLAYERKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h
new file mode 100644
index 0000000000..2bbd6a83fe
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCBATCHNORMALIZATIONLAYERKERNEL_H__
+#define __ARM_COMPUTE_GCBATCHNORMALIZATIONLAYERKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the BatchNormalization layer kernel.
+ */
+class GCBatchNormalizationLayerKernel : public IGCKernel
+{
+public:
+    /** Constructor */
+    GCBatchNormalizationLayerKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCBatchNormalizationLayerKernel(const GCBatchNormalizationLayerKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCBatchNormalizationLayerKernel &operator=(const GCBatchNormalizationLayerKernel &) = delete;
+    /** Default Move Constructor. */
+    GCBatchNormalizationLayerKernel(GCBatchNormalizationLayerKernel &&) = default;
+    /** Default move assignment operator. */
+    GCBatchNormalizationLayerKernel &operator=(GCBatchNormalizationLayerKernel &&) = default;
+    /** Default destructor */
+    ~GCBatchNormalizationLayerKernel() = default;
+
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input   Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM].
+     *                     The rest are optional and used for representing batches. Data types supported: F16/F32.
+     * @param[out] output  Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
+     * @param[in]  mean    Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  var     Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  beta    Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  epsilon Small value to avoid division with zero.
+     */
+    void configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta, const IGCTensor *gamma, float epsilon);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+    const IGCTensor *_mean;
+    const IGCTensor *_var;
+    const IGCTensor *_beta;
+    const IGCTensor *_gamma;
+    float            _epsilon;
+};
+}
+#endif /*__ARM_COMPUTE_GCBATCHNORMALIZATIONLAYERKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h
new file mode 100644
index 0000000000..257ab0eca0
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCCOL2IMKERNEL_H__
+#define __ARM_COMPUTE_GCCOL2IMKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the col2im reshaping kernel.
+ *
+ * Rearranges each matrix column into image blocks. It's the inverse operation of @ref GCIm2ColKernel.
+ *
+ * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3:
+ *
+ * @f[
+ * \left( \begin{array}{ccccccccc}
+ * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccc}
+ * a0 & a1 & a2 \\
+ * a3 & a4 & a5 \\
+ * a6 & a7 & a8 \\
+ * \end{array} \right)
+ * @f]
+ */
+class GCCol2ImKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCCol2ImKernel();
+
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCCol2ImKernel(const GCCol2ImKernel &) = delete;
+
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCCol2ImKernel &operator=(const GCCol2ImKernel &) = delete;
+
+    /** Allow instances of this class to be moved */
+    GCCol2ImKernel(GCCol2ImKernel &&) = default;
+
+    /** Allow instances of this class to be moved */
+    GCCol2ImKernel &operator=(GCCol2ImKernel &&) = default;
+
+    /** Default destructor */
+    ~GCCol2ImKernel() = default;
+
+    /** Set the input and output of the kernel.
+     *
+     * @param[in]  input          The input tensor to convert. Data types supported: F32
+     * @param[out] output         The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
+     *                            while the rest represent batch of outputs. Data types supported: Same as @p input
+     * @param[in]  convolved_dims Output convolved dimensions.
+     */
+    void configure(const IGCTensor *input, IGCTensor *output, std::pair<unsigned int, unsigned int> convolved_dims);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+    std::pair<unsigned int, unsigned int> _convolved_dims;
+};
+}
+
+#endif /*__ARM_COMPUTE_GCCOL2IMKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h
new file mode 100644
index 0000000000..9a34a9a9c5
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__
+#define __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the depth concatenate kernel.
+ *  The input tensor will be concatenated into the output tensor.
+ */
+class GCDepthConcatenateKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCDepthConcatenateKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCDepthConcatenateKernel(const GCDepthConcatenateKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCDepthConcatenateKernel &operator=(const GCDepthConcatenateKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCDepthConcatenateKernel(GCDepthConcatenateKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCDepthConcatenateKernel &operator=(GCDepthConcatenateKernel &&) = default;
+    /** Default destructor */
+    ~GCDepthConcatenateKernel() = default;
+    /** Initialise the kernel's inputs and output
+     *
+     * @param[in]     input        Input tensor. Data types supported: F16/F32.
+     * @param[in]     depth_offset The offset on the Z axis.
+     * @param[in,out] output       Output tensor. Data types supported: Same as @p input.
+     *
+     * @note: The output tensor's low two dimensions can't be smaller than the input one's.
+     * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
+     *
+     */
+    void configure(const IGCTensor *input, unsigned int depth_offset, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+    BorderSize border_size() const override;
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+    int              _top_bottom;
+    int              _left_right;
+};
+}
+#endif /* __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h
new file mode 100644
index 0000000000..415b781bc6
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYERKERNEL_H__
+#define __ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYERKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the direct convolution kernel.
+ */
+template <unsigned int kernel_size>
+class GCDirectConvolutionLayerKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCDirectConvolutionLayerKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCDirectConvolutionLayerKernel(const GCDirectConvolutionLayerKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCDirectConvolutionLayerKernel &operator=(const GCDirectConvolutionLayerKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCDirectConvolutionLayerKernel(GCDirectConvolutionLayerKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCDirectConvolutionLayerKernel &operator=(GCDirectConvolutionLayerKernel &&) = default;
+    /** Default destructor */
+    ~GCDirectConvolutionLayerKernel() = default;
+    /** Set the input and output of the kernel.
+     *
+     * @param[in]  input     The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
+     *                       while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32
+     * @param[in]  weights   Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+     * @param[in]  bias      Biases tensor. Shared bias supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input.
+     * @param[out] output    The output tensor. First 2 lower dimensions represent a transform of each 3D input,
+     *                       while every dimension above represents a batch. Data types supported: Same as @p input
+     * @param[in]  conv_info Contains padding and stride information described in @ref PadStrideInfo.
+     */
+    void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *bias, IGCTensor *output, const PadStrideInfo &conv_info);
+
+    // Inherited methods overridden:
+    BorderSize border_size() const override;
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    const IGCTensor *_bias;
+    const IGCTensor *_weights;
+    IGCTensor       *_output;
+    BorderSize       _border_size;
+    int              _conv_stride_x;
+    int              _conv_stride_y;
+    int              _conv_pad_x;
+    int              _conv_pad_y;
+    gles::NDRange    _lws;
+};
+
+using GCDirectConvolutionLayer1x1Kernel = GCDirectConvolutionLayerKernel<1>;
+using GCDirectConvolutionLayer3x3Kernel = GCDirectConvolutionLayerKernel<3>;
+using GCDirectConvolutionLayer5x5Kernel = GCDirectConvolutionLayerKernel<5>;
+}
+#endif /*__ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYERKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h
new file mode 100644
index 0000000000..6159a7af26
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCDROPOUTKERNEL_H__
+#define __ARM_COMPUTE_GCDROPOUTKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the dropout kernel.
+ *
+ * Dropout is used to improve over-fit on neural networks.
+ *
+ */
+class GCDropoutKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCDropoutKernel();
+
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCDropoutKernel(const GCDropoutKernel &) = delete;
+
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCDropoutKernel &operator=(const GCDropoutKernel &) = delete;
+
+    /** Allow instances of this class to be moved */
+    GCDropoutKernel(GCDropoutKernel &&) = default;
+
+    /** Allow instances of this class to be moved */
+    GCDropoutKernel &operator=(GCDropoutKernel &&) = default;
+
+    /** Set the input and output of the kernel.
+     *
+     * @param[in]  input   The input tensor for this op. Data types supported: F16/F32
+     * @param[out] mask    The mask tensor. Data types supported: Same as @p input
+     * @param[out] output  The output tensor. Data types supported: Same as @p input
+     * @param[in]  ratio   Dropout ratio
+     * @param[in]  forward Forward or backward propagation
+     *
+     */
+    void configure(const IGCTensor *input, IGCTensor *mask, IGCTensor *output, float ratio, bool forward);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_mask;
+    IGCTensor       *_output;
+    unsigned int     _num_elems_processed_per_iteration;
+};
+}
+
+#endif /*__ARM_COMPUTE_GCDROPOUTKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h
new file mode 100644
index 0000000000..acb8aa67d3
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCFILLBORDERKERNEL_H__
+#define __ARM_COMPUTE_GCFILLBORDERKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for filling the border of a kernel */
+class GCFillBorderKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCFillBorderKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCFillBorderKernel(const GCFillBorderKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCFillBorderKernel &operator=(const GCFillBorderKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCFillBorderKernel(GCFillBorderKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCFillBorderKernel &operator=(GCFillBorderKernel &&) = default;
+    /** Default destructor */
+    ~GCFillBorderKernel() = default;
+
+    /** Initialise the kernel's input, output and border mode.
+     *
+     * @param[in,out] tensor                Tensor to process Data types supported: F16/F32.
+     * @param[in]     border_size           Size of the border to fill in elements.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const IGCTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
+
+    /** Function to set the constant value on fill border kernel depending on type.
+     *
+     * @param[in] idx                   Index of the kernel argument to set.
+     * @param[in] constant_border_value Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    template <class T>
+    void set_constant_border(unsigned int idx, const PixelValue &constant_border_value);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+    bool is_parallelisable() const override;
+
+private:
+    const IGCTensor *_tensor;
+};
+}
+#endif /*__ARM_COMPUTE_GCFILLBORDERKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h
new file mode 100644
index 0000000000..b2369a6ad1
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCGEMMINTERLEAVE4X4KERNEL_H__
+#define __ARM_COMPUTE_GCGEMMINTERLEAVE4X4KERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** OpenGL ES kernel which interleaves the elements of a matrix A in chunk of 4x4
+ *
+ * This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values)
+ *
+ * @f[
+ * \left( \begin{array}{cccc}
+ * a00 & a01 & a02 & a03 \\
+ * a10 & a11 & a12 & a13 \\
+ * a20 & a21 & a22 & a23 \\
+ * a30 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccccccccccccccccc}
+ * a00 & a10 & a20 & a30 & a01 & a11 & a21 & a31 & a02 & a12 & a22 & a32 & a03 & a13 & a23 & a33 \\
+ * \end{array} \right)
+ * @f]
+ *
+ * After this operation, the output matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ]
+ */
+class GCGEMMInterleave4x4Kernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCGEMMInterleave4x4Kernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMInterleave4x4Kernel(const GCGEMMInterleave4x4Kernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMInterleave4x4Kernel &operator=(const GCGEMMInterleave4x4Kernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCGEMMInterleave4x4Kernel(GCGEMMInterleave4x4Kernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCGEMMInterleave4x4Kernel &operator=(GCGEMMInterleave4x4Kernel &&) = default;
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  input  Input tensor. Data types supported: F32
+     * @param[out] output Output tensor. Data type supported: same as @p input
+     */
+    void configure(const IGCTensor *input, IGCTensor *output);
+
+    // Inherited methods overridden
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+};
+}
+#endif /* __ARM_COMPUTE_GCGEMMINTERLEAVE4X4KERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h
new file mode 100644
index 0000000000..20f28cbb65
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCGEMMMATRIXACCUMULATEBIASESKERNEL_H__
+#define __ARM_COMPUTE_GCGEMMMATRIXACCUMULATEBIASESKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+/** Interface to add a bias to each row of the input tensor
+ *
+ */
+class GCGEMMMatrixAccumulateBiasesKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCGEMMMatrixAccumulateBiasesKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMMatrixAccumulateBiasesKernel(const GCGEMMMatrixAccumulateBiasesKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMMatrixAccumulateBiasesKernel &operator=(const GCGEMMMatrixAccumulateBiasesKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCGEMMMatrixAccumulateBiasesKernel(GCGEMMMatrixAccumulateBiasesKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCGEMMMatrixAccumulateBiasesKernel &operator=(GCGEMMMatrixAccumulateBiasesKernel &&) = default;
+    /** Set the accumulate buffer and the biases of the kernel.
+     *
+     * @param[in, out] accum  The accumulate tensor to convert. Data types supported: F16/F32
+     * @param[in]      biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input
+     */
+    void configure(IGCTensor *accum, const IGCTensor *biases);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    IGCTensor       *_accum;
+    const IGCTensor *_biases;
+};
+}
+
+#endif /*__ARM_COMPUTE_GCGEMMMATRIXACCUMULATEBIASESKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h
new file mode 100644
index 0000000000..02abb8da76
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCGEMMMATRIXADDITIONKERNEL_H__
+#define __ARM_COMPUTE_GCGEMMMATRIXADDITIONKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** OpenGL ES kernel to perform the in-place matrix addition between 2 matrices, taking into account that the second matrix might be weighted by a scalar value beta.
+ *  The matrices must have the same dimensions
+ *
+ * @note This kernel is computed if and only if beta != 0.0.
+ */
+class GCGEMMMatrixAdditionKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCGEMMMatrixAdditionKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMMatrixAdditionKernel(const GCGEMMMatrixAdditionKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMMatrixAdditionKernel &operator=(const GCGEMMMatrixAdditionKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCGEMMMatrixAdditionKernel(GCGEMMMatrixAdditionKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCGEMMMatrixAdditionKernel &operator=(GCGEMMMatrixAdditionKernel &&) = default;
+    /** Initialise the kernel's input, output and beta value
+     *
+     * @note The input and output tensors must have the same dimensions
+     *
+     * @param[in]      input  Input tensor (Matrix C). Data types supported: F32
+     * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result (alpha * AB + beta * C), output must contain the result obtained by @ref GCGEMMMatrixMultiplyKernel. Data type supported: same as @p input
+     * @param[in]      beta   Weight of matrix C
+     */
+    void configure(const IGCTensor *input, IGCTensor *output, float beta);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+};
+}
+
+#endif /* __ARM_COMPUTE_GCGEMMMATRIXADDITIONKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h
new file mode 100644
index 0000000000..3a0b22f148
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCGEMMMATRIXMULTIPLYKERNEL_H__
+#define __ARM_COMPUTE_GCGEMMMATRIXMULTIPLYKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** GLES Compute kernel to multiply two input matrices "A" and "B" or to multiply a vector "A" by a matrix "B". All elements of the output matrix/vector will be multiplied by alpha
+ *
+ * @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref GCGEMMInterleave4x4Kernel" and @ref GCGEMMTranspose1xWKernel
+ * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped
+ *
+ * @attention The second input tensor must have at least 2 dimensions (matrix)
+ *
+ */
+class GCGEMMMatrixMultiplyKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCGEMMMatrixMultiplyKernel();
+
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMMatrixMultiplyKernel(const GCGEMMMatrixMultiplyKernel &) = delete;
+
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMMatrixMultiplyKernel &operator=(const GCGEMMMatrixMultiplyKernel &) = delete;
+
+    /** Allow instances of this class to be moved */
+    GCGEMMMatrixMultiplyKernel(GCGEMMMatrixMultiplyKernel &&) = default;
+
+    /** Allow instances of this class to be moved */
+    GCGEMMMatrixMultiplyKernel &operator=(GCGEMMMatrixMultiplyKernel &&) = default;
+
+    /** Initialise the kernel's input, output and alpha
+     *
+     * @param[in]  input0                    Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32
+     * @param[in]  input1                    Input tensor containing the transposed Matrix B if the first input tensor A is not a vector.
+     *                                       If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0
+     * @param[out] output                    Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
+     * @param[in]  alpha                     Weight of the matrix product
+     * @param[in]  is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref GCGEMMInterleave4x4Kernel and @ref GCGEMMTranspose1xWKernel
+     */
+    void configure(const IGCTensor *input0, const IGCTensor *input1, IGCTensor *output, float alpha, bool is_interleaved_transposed = true);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input0;
+    const IGCTensor *_input1;
+    IGCTensor       *_output;
+};
+}
+#endif /* __ARM_COMPUTE_GCGEMMMATRIXMULTIPLYKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h
new file mode 100644
index 0000000000..4223556ac4
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCGEMMTRANSPOSE1XWKERNEL_H__
+#define __ARM_COMPUTE_GCGEMMTRANSPOSE1XWKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** OpenGLES kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor)
+ *
+ * Following an example of how the transposition1xW works when the input data type is F32
+ *
+ * @f[
+ * \left( \begin{array}{cccc}
+ * a00 & a01 & a02 & a03 \\
+ * a10 & a11 & a12 & a13 \\
+ * a20 & a21 & a22 & a23 \\
+ * a30 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccccccccccccccccc}
+ * a00 & a01 & a02 & a03 & a10 & a11 & a12 & a13 & a20 & a21 & a22 & a23 & a30 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * @f]
+ *
+ * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor)
+ *
+ */
+class GCGEMMTranspose1xWKernel : public IGCSimple2DKernel
+{
+public:
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  input  Input tensor. Data types supported: F32
+     * @param[out] output Output tensor. Data type supported: same as @p input
+     */
+    void configure(const IGCTensor *input, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+};
+}
+#endif /* __ARM_COMPUTE_GCGEMMTRANSPOSE1XWKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h
new file mode 100644
index 0000000000..e1b35607ff
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCIM2COLKERNEL_H__
+#define __ARM_COMPUTE_GCIM2COLKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the im2col reshape kernel.
+ *
+ * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column.
+ * It is used to transform a convolution to a plain matrix multiplication.
+ *
+ * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have:
+ * @f[
+ * \left( \begin{array}{cccc}
+ * a00 & a01 & a02 & a03 \\
+ * a10 & a11 & a12 & a13 \\
+ * a20 & a21 & a22 & a23 \\
+ * a30 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * =
+ * \left( \begin{array}{ccccccccc}
+ * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\
+ * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\
+ * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\
+ * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * @f]
+ */
+class GCIm2ColKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCIm2ColKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCIm2ColKernel(const GCIm2ColKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCIm2ColKernel &operator=(const GCIm2ColKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCIm2ColKernel(GCIm2ColKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCIm2ColKernel &operator=(GCIm2ColKernel &&) = default;
+    /** Set the input and output of the kernel.
+     *
+     * @param[in]  input       The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
+     *                         while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F32
+     * @param[out] output      The output tensor. First 2 lower dimensions represent a transform of each 3D input,
+     *                         while every dimension above represents a batch. Data types supported: Same as @p input
+     * @param[in]  kernel_dims The kernel dimensions (width and height).
+     * @param[in]  conv_info   Contains padding and stride information described in @ref PadStrideInfo.
+     * @param[in]  has_bias    In case biases are provided expands the matrix with 1.
+     */
+    void configure(const IGCTensor *input, IGCTensor *output, std::pair<unsigned int, unsigned int> kernel_dims, const PadStrideInfo &conv_info, bool has_bias);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    /** Run the reshape kernel optimised for the special case (stride is 1, padding is 0 and kernel's low 3 dimensions are same as input)
+     *
+     * @param[in]     window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+     * @param[in,out] queue  Command queue on which to enqueue the kernel.
+     */
+    void run_reduced(const Window &window);
+    /** run the generic convolution layer input reshape kernel
+     *
+     * @param[in]     window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+     * @param[in,out] queue  Command queue on which to enqueue the kernel.
+     */
+    void run_generic(const Window &window);
+
+    /** Common signature for the kernel to run */
+    using Im2ColFunction = void (GCIm2ColKernel::*)(const Window &);
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+    std::pair<unsigned int, unsigned int> _convolved_dims;
+    unsigned int   _num_elems_processed_per_iteration;
+    Im2ColFunction _run_func;
+};
+}
+
+#endif /*__ARM_COMPUTE_GCIM2COLKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h
new file mode 100644
index 0000000000..e8bc7ad2b2
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCNORMALIZATIONLAYERKERNEL_H__
+#define __ARM_COMPUTE_GCNORMALIZATIONLAYERKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the normalization layer kernel.
+ */
+class GCNormalizationLayerKernel : public IGCKernel
+{
+public:
+    /** Constructor */
+    GCNormalizationLayerKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCNormalizationLayerKernel(const GCNormalizationLayerKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCNormalizationLayerKernel &operator=(const GCNormalizationLayerKernel &) = delete;
+    /** Default Move Constructor. */
+    GCNormalizationLayerKernel(GCNormalizationLayerKernel &&) = default;
+    /** Default move assignment operator. */
+    GCNormalizationLayerKernel &operator=(GCNormalizationLayerKernel &&) = default;
+    /** Default destrutor */
+    ~GCNormalizationLayerKernel() = default;
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input         Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
+     *                           and an optional 4th dimension for batch of inputs. Data types supported: F32.
+     * @param[in]  squared_input Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM],
+     *                           Data types should match the input type.
+     * @param[out] output        Destination tensor. Output will have the same number of dimensions as input. Data types should match the input type.
+     * @param[in]  norm_info     Normalization layer information like the normalization type, normalization size and other parameters.
+     */
+    void configure(const IGCTensor *input, const IGCTensor *squared_input, IGCTensor *output, NormalizationLayerInfo norm_info);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+    BorderSize border_size() const override;
+
+private:
+    const IGCTensor *_input;
+    const IGCTensor *_squared_input;
+    IGCTensor       *_output;
+    BorderSize       _border_size;
+};
+}
+#endif /*__ARM_COMPUTE_GCNORMALIZATIONLAYERKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h
new file mode 100644
index 0000000000..3b01b4ad4d
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCPIXELWISEMULTIPLICATIONKERNEL_H__
+#define __ARM_COMPUTE_GCPIXELWISEMULTIPLICATIONKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the pixelwise multiplication kernel.
+ *
+ */
+class GCPixelWiseMultiplicationKernel : public IGCKernel
+{
+public:
+    /** Default constructor.*/
+    GCPixelWiseMultiplicationKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    GCPixelWiseMultiplicationKernel(const GCPixelWiseMultiplicationKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    GCPixelWiseMultiplicationKernel &operator=(const GCPixelWiseMultiplicationKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCPixelWiseMultiplicationKernel(GCPixelWiseMultiplicationKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCPixelWiseMultiplicationKernel &operator=(GCPixelWiseMultiplicationKernel &&) = default;
+    /** Initialise the kernel's input, output and border mode.
+     *
+     * @param[in]  input1 An input tensor. Data types supported: F32.
+     * @param[in]  input2 An input tensor. Data types supported: same as @p input1.
+     * @param[out] output The output tensor, Data types supported: same as @p input1.
+     * @param[in]  scale  Scale to apply after multiplication.
+     *                             Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
+     */
+    void configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output, float scale);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input1;
+    const IGCTensor *_input2;
+    IGCTensor       *_output;
+};
+}
+
+#endif /*__ARM_COMPUTE_GCPIXELWISEMULTIPLICATIONKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h
new file mode 100644
index 0000000000..d4921c2092
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCPOOLINGLAYERKERNEL_H__
+#define __ARM_COMPUTE_GCPOOLINGLAYERKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the pooling layer kernel */
+class GCPoolingLayerKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCPoolingLayerKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCPoolingLayerKernel(const GCPoolingLayerKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCPoolingLayerKernel &operator=(const GCPoolingLayerKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCPoolingLayerKernel(GCPoolingLayerKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCPoolingLayerKernel &operator=(GCPoolingLayerKernel &&) = default;
+    /** Default destructor */
+    ~GCPoolingLayerKernel() = default;
+
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input     Source tensor. Data types supported: F16/F32.
+     * @param[out] output    Destination tensor. Data types supported: Same as @p input.
+     * @param[in]  pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+     */
+    void configure(const IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+    BorderSize border_size() const override;
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+    PoolingLayerInfo _pool_info;
+    BorderSize       _border_size;
+    unsigned int     _num_elems_processed_per_iteration;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_GCPOOLINGLAYERKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h
new file mode 100644
index 0000000000..b9eb305bab
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCSOFTMAXLAYERKERNEL_H__
+#define __ARM_COMPUTE_GCSOFTMAXLAYERKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the identifying the max value of 1D Logits */
+class GCLogits1DMaxKernel : public IGCSimple3DKernel
+{
+public:
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input  Source tensor. Data types supported: F16/F32
+     * @param[out] output Destination tensor. Data types supported: same as @p input
+     */
+    void configure(const IGCTensor *input, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+};
+
+/** Interface for shifting the logits values around the max value and exponentiating the result */
+class GCLogits1DShiftExpSumKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCLogits1DShiftExpSumKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCLogits1DShiftExpSumKernel(const GCLogits1DShiftExpSumKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCLogits1DShiftExpSumKernel &operator=(const GCLogits1DShiftExpSumKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCLogits1DShiftExpSumKernel(GCLogits1DShiftExpSumKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCLogits1DShiftExpSumKernel &operator=(GCLogits1DShiftExpSumKernel &&) = default;
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input  Source tensor. Data types supported: F16/F32
+     * @param[in]  max    Max values tensor. Data types supported: same as @p input
+     * @param[out] output Destination tensor. Data types supported: same as @p input
+     * @param[out] sum    Sum of 1D logits tensor. Data types supported: same as @p input
+     */
+    void configure(const IGCTensor *input, const IGCTensor *max, IGCTensor *output, IGCTensor *sum);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    const IGCTensor *_max;
+    IGCTensor       *_output;
+    IGCTensor       *_sum;
+};
+
+/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */
+class GCLogits1DNormKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCLogits1DNormKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCLogits1DNormKernel(const GCLogits1DNormKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCLogits1DNormKernel &operator=(const GCLogits1DNormKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCLogits1DNormKernel(GCLogits1DNormKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCLogits1DNormKernel &operator=(GCLogits1DNormKernel &&) = default;
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input  Source tensor. Data types supported: F16/F32
+     * @param[in]  sum    Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
+     * @param[out] output Destination tensor. Data types supported: same as @p input
+     */
+    void configure(const IGCTensor *input, const IGCTensor *sum, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    const IGCTensor *_sum;
+    IGCTensor       *_output;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_GCSOFTMAXLAYERKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h
new file mode 100644
index 0000000000..c628a00585
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCTRANSPOSEKERNEL_H__
+#define __ARM_COMPUTE_GCTRANSPOSEKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** OpenGL ES kernel which transposes the elements of a matrix.
+ *
+ * [width, height, batch] -> [height, width, batch]
+ *
+ */
+class GCTransposeKernel : public IGCSimple2DKernel
+{
+public:
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  input  Input tensor. Data types supported: F16/F32
+     * @param[out] output Output tensor. Data type supported: Same as @p input
+     */
+    void configure(const IGCTensor *input, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+};
+}
+#endif /* __ARM_COMPUTE_GCTRANSPOSEKERNEL_H__ */
-- 
cgit v1.2.1