From 7068f9900d136312318ff430aef588b14e0c87ad Mon Sep 17 00:00:00 2001
From: Anthony Barbier <anthony.barbier@arm.com>
Date: Thu, 26 Oct 2017 15:23:08 +0100
Subject: COMPMID-631: Merge branches/gles_compute branch

Last commit:
commit b25c5f68042b0c81bf611d59a1bb8535e1c42497
Author: Xinghang Zhou <xinghang.zhou@arm.com>
Date:   Wed Oct 25 18:48:10 2017 +0800

    Synced validation's tolerances of GCSoftmax from cl side

Change-Id: Ibe72054205c1c8721845d679a31af7ed0a7c5cf6
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/93283
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
---
 SConscript                                         |   20 +
 SConstruct                                         |    7 +-
 arm_compute/core/CL/CLHelpers.h                    |    2 +-
 .../CL/kernels/CLBatchNormalizationLayerKernel.h   |    8 +-
 arm_compute/core/Error.h                           |   13 +-
 arm_compute/core/GLES_COMPUTE/GCHelpers.h          |   64 +
 arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h    |  306 ++++
 arm_compute/core/GLES_COMPUTE/GCKernels.h          |   48 +
 arm_compute/core/GLES_COMPUTE/IGCKernel.h          |  179 ++
 arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h  |   41 +
 arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h  |   43 +
 arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h    |   66 +
 arm_compute/core/GLES_COMPUTE/IGCTensor.h          |   99 ++
 arm_compute/core/GLES_COMPUTE/OpenGLES.h           |  165 ++
 .../kernels/GCAbsoluteDifferenceKernel.h           |   71 +
 .../GLES_COMPUTE/kernels/GCActivationLayerKernel.h |   68 +
 .../kernels/GCBatchNormalizationLayerKernel.h      |   77 +
 .../core/GLES_COMPUTE/kernels/GCCol2ImKernel.h     |   92 +
 .../kernels/GCDepthConcatenateKernel.h             |   76 +
 .../kernels/GCDirectConvolutionLayerKernel.h       |   87 +
 .../core/GLES_COMPUTE/kernels/GCDropoutKernel.h    |   79 +
 .../core/GLES_COMPUTE/kernels/GCFillBorderKernel.h |   77 +
 .../kernels/GCGEMMInterleave4x4Kernel.h            |   80 +
 .../kernels/GCGEMMMatrixAccumulateBiasesKernel.h   |   63 +
 .../kernels/GCGEMMMatrixAdditionKernel.h           |   70 +
 .../kernels/GCGEMMMatrixMultiplyKernel.h           |   79 +
 .../kernels/GCGEMMTranspose1xWKernel.h             |   67 +
 .../core/GLES_COMPUTE/kernels/GCIm2ColKernel.h     |  109 ++
 .../kernels/GCNormalizationLayerKernel.h           |   72 +
 .../kernels/GCPixelWiseMultiplicationKernel.h      |   70 +
 .../GLES_COMPUTE/kernels/GCPoolingLayerKernel.h    |   70 +
 .../GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h    |  112 ++
 .../core/GLES_COMPUTE/kernels/GCTransposeKernel.h  |   52 +
 arm_compute/core/Log.h                             |   54 +-
 .../NEON/kernels/NEBatchNormalizationLayerKernel.h |    4 +-
 arm_compute/core/Utils.h                           |   23 +-
 .../CL/functions/CLBatchNormalizationLayer.h       |    6 +-
 .../runtime/CL/functions/CLNormalizationLayer.h    |    4 +-
 arm_compute/runtime/GLES_COMPUTE/GCFunctions.h     |   45 +
 arm_compute/runtime/GLES_COMPUTE/GCScheduler.h     |   73 +
 arm_compute/runtime/GLES_COMPUTE/GCTensor.h        |  100 ++
 .../runtime/GLES_COMPUTE/GCTensorAllocator.h       |  128 ++
 .../runtime/GLES_COMPUTE/IGCSimpleFunction.h       |   50 +
 .../GLES_COMPUTE/functions/GCAbsoluteDifference.h  |   52 +
 .../GLES_COMPUTE/functions/GCActivationLayer.h     |   53 +
 .../functions/GCBatchNormalizationLayer.h          |   67 +
 .../GLES_COMPUTE/functions/GCDepthConcatenate.h    |   67 +
 .../functions/GCDirectConvolutionLayer.h           |   59 +
 .../GLES_COMPUTE/functions/GCDropoutLayer.h        |   63 +
 .../runtime/GLES_COMPUTE/functions/GCFillBorder.h  |   52 +
 .../GLES_COMPUTE/functions/GCFullyConnectedLayer.h |   96 ++
 .../runtime/GLES_COMPUTE/functions/GCGEMM.h        |   85 +
 .../GLES_COMPUTE/functions/GCGEMMInterleave4x4.h   |   50 +
 .../GLES_COMPUTE/functions/GCGEMMTranspose1xW.h    |   47 +
 .../GLES_COMPUTE/functions/GCNormalizationLayer.h  |   71 +
 .../functions/GCPixelWiseMultiplication.h          |   48 +
 .../GLES_COMPUTE/functions/GCPoolingLayer.h        |   53 +
 .../GLES_COMPUTE/functions/GCSoftmaxLayer.h        |   69 +
 .../runtime/GLES_COMPUTE/functions/GCTranspose.h   |   50 +
 .../NEON/functions/NEBatchNormalizationLayer.h     |    4 +-
 .../runtime/NEON/functions/NENormalizationLayer.h  |    4 +-
 examples/SConscript                                |   23 +
 examples/gc_absdiff.cpp                            |  112 ++
 opengles-3.1/include/EGL/egl.h                     |  303 ++++
 opengles-3.1/include/EGL/eglext.h                  |  804 +++++++++
 opengles-3.1/include/EGL/eglplatform.h             |  122 ++
 opengles-3.1/include/GLES/gl.h                     |  770 +++++++++
 opengles-3.1/include/GLES/glext.h                  | 1278 ++++++++++++++
 opengles-3.1/include/GLES/glplatform.h             |   30 +
 opengles-3.1/include/GLES2/gl2.h                   |  620 +++++++
 opengles-3.1/include/GLES2/gl2ext.h                | 1809 ++++++++++++++++++++
 opengles-3.1/include/GLES2/gl2platform.h           |   30 +
 opengles-3.1/include/GLES3/gl3.h                   | 1061 ++++++++++++
 opengles-3.1/include/GLES3/gl31.h                  | 1187 +++++++++++++
 opengles-3.1/include/GLES3/gl3ext.h                |   24 +
 opengles-3.1/include/GLES3/gl3platform.h           |   30 +
 opengles-3.1/include/KHR/khrplatform.h             |  273 +++
 opengles-3.1/mali_include/EGL/fbdev_window.h       |   50 +
 opengles-3.1/stubs/EGL.c                           |   40 +
 opengles-3.1/stubs/GLESv2.c                        |  269 +++
 opengles-3.1/stubs/Readme.txt                      |    2 +
 opengles-3.1/stubs/SConscript                      |   11 +
 scripts/check_bad_style.sh                         |    2 +-
 scripts/clang_tidy_rules.py                        |    3 +-
 scripts/fix_code_formatting.sh                     |    2 +-
 src/core/CL/cl_kernels/direct_convolution1x1.cl    |    4 +-
 src/core/CL/cl_kernels/direct_convolution3x3.cl    |    4 +-
 src/core/CL/cl_kernels/direct_convolution5x5.cl    |    4 +-
 src/core/Error.cpp                                 |   18 +-
 src/core/GLES_COMPUTE/GCKernelLibrary.cpp          |  716 ++++++++
 src/core/GLES_COMPUTE/IGCKernel.cpp                |  157 ++
 src/core/GLES_COMPUTE/IGCSimple2DKernel.cpp        |   51 +
 src/core/GLES_COMPUTE/IGCSimple3DKernel.cpp        |   52 +
 src/core/GLES_COMPUTE/IGCSimpleKernel.cpp          |   54 +
 src/core/GLES_COMPUTE/IGCTensor.cpp                |   54 +
 src/core/GLES_COMPUTE/OpenGLES.cpp                 |  820 +++++++++
 src/core/GLES_COMPUTE/cs_shaders/absdiff.cs        |   71 +
 .../GLES_COMPUTE/cs_shaders/activation_layer.cs    |  262 +++
 .../cs_shaders/batchnormalization_layer.cs         |  222 +++
 src/core/GLES_COMPUTE/cs_shaders/concatenate.cs    |  106 ++
 .../GLES_COMPUTE/cs_shaders/convolution_layer.cs   |  302 ++++
 .../cs_shaders/direct_convolution1x1.cs            |  275 +++
 .../cs_shaders/direct_convolution3x3.cs            | 1583 +++++++++++++++++
 .../cs_shaders/direct_convolution5x5.cs            |  313 ++++
 src/core/GLES_COMPUTE/cs_shaders/dropout.cs        |  204 +++
 src/core/GLES_COMPUTE/cs_shaders/fill_border.cs    |  553 ++++++
 src/core/GLES_COMPUTE/cs_shaders/gemm.cs           |  623 +++++++
 src/core/GLES_COMPUTE/cs_shaders/helpers.h         |  582 +++++++
 .../GLES_COMPUTE/cs_shaders/normalization_layer.cs |  157 ++
 .../GLES_COMPUTE/cs_shaders/pixelwise_mul_float.cs |   75 +
 src/core/GLES_COMPUTE/cs_shaders/pooling_layer.cs  | 1444 ++++++++++++++++
 src/core/GLES_COMPUTE/cs_shaders/softmax_layer.cs  |  541 ++++++
 src/core/GLES_COMPUTE/cs_shaders/transpose.cs      |  187 ++
 src/core/GLES_COMPUTE/egl_entries.in               |   35 +
 src/core/GLES_COMPUTE/gl_entries.in                |   63 +
 .../kernels/GCAbsoluteDifferenceKernel.cpp         |  112 ++
 .../kernels/GCActivationLayerKernel.cpp            |  128 ++
 .../kernels/GCBatchNormalizationLayerKernel.cpp    |  129 ++
 src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp   |  101 ++
 .../kernels/GCDepthConcatenateKernel.cpp           |  145 ++
 .../kernels/GCDirectConvolutionLayerKernel.cpp     |  394 +++++
 src/core/GLES_COMPUTE/kernels/GCDropoutKernel.cpp  |  110 ++
 .../GLES_COMPUTE/kernels/GCFillBorderKernel.cpp    |  169 ++
 .../kernels/GCGEMMInterleave4x4Kernel.cpp          |  129 ++
 .../kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp |  123 ++
 .../kernels/GCGEMMMatrixAdditionKernel.cpp         |  104 ++
 .../kernels/GCGEMMMatrixMultiplyKernel.cpp         |  210 +++
 .../kernels/GCGEMMTranspose1xWKernel.cpp           |  128 ++
 src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp   |  230 +++
 .../kernels/GCNormalizationLayerKernel.cpp         |  124 ++
 .../kernels/GCPixelWiseMultiplicationKernel.cpp    |  127 ++
 .../GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp  |  254 +++
 .../GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp  |  353 ++++
 .../GLES_COMPUTE/kernels/GCTransposeKernel.cpp     |  116 ++
 src/core/Helpers.cpp                               |    7 +
 src/core/Utils.cpp                                 |    3 +-
 src/runtime/CL/functions/CLNormalizationLayer.cpp  |    2 +-
 src/runtime/GLES_COMPUTE/GCScheduler.cpp           |   61 +
 src/runtime/GLES_COMPUTE/GCTensor.cpp              |   77 +
 src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp     |   94 +
 src/runtime/GLES_COMPUTE/IGCSimpleFunction.cpp     |   45 +
 .../functions/GCAbsoluteDifference.cpp             |   40 +
 .../GLES_COMPUTE/functions/GCActivationLayer.cpp   |   37 +
 .../functions/GCBatchNormalizationLayer.cpp        |   48 +
 .../GLES_COMPUTE/functions/GCDepthConcatenate.cpp  |   69 +
 .../functions/GCDirectConvolutionLayer.cpp         |   64 +
 .../GLES_COMPUTE/functions/GCDropoutLayer.cpp      |   50 +
 .../GLES_COMPUTE/functions/GCFillBorder.cpp        |   40 +
 .../functions/GCFullyConnectedLayer.cpp            |  177 ++
 src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp      |  133 ++
 .../GLES_COMPUTE/functions/GCGEMMInterleave4x4.cpp |   36 +
 .../GLES_COMPUTE/functions/GCGEMMTranspose1xW.cpp  |   38 +
 .../functions/GCNormalizationLayer.cpp             |   61 +
 .../functions/GCPixelWiseMultiplication.cpp        |   38 +
 .../GLES_COMPUTE/functions/GCPoolingLayer.cpp      |   42 +
 .../GLES_COMPUTE/functions/GCSoftmaxLayer.cpp      |   66 +
 src/runtime/GLES_COMPUTE/functions/GCTranspose.cpp |   38 +
 .../NEON/functions/NENormalizationLayer.cpp        |    2 +-
 tests/GLES_COMPUTE/GCAccessor.h                    |  144 ++
 tests/GLES_COMPUTE/Helper.h                        |  115 ++
 tests/SConscript                                   |   29 +
 .../GLES_COMPUTE/DirectConvolutionLayer.cpp        |  107 ++
 .../benchmark/GLES_COMPUTE/FullyConnectedLayer.cpp |  106 ++
 tests/benchmark/GLES_COMPUTE/GEMM.cpp              |   56 +
 tests/benchmark/GLES_COMPUTE/PoolingLayer.cpp      |  128 ++
 tests/benchmark/fixtures/ConvolutionLayerFixture.h |   15 +
 tests/datasets/FullyConnectedLayerDataset.h        |    1 +
 tests/framework/Framework.cpp                      |   11 +
 tests/framework/SConscript                         |    5 +
 tests/main.cpp                                     |    7 +
 tests/validation/GLES_COMPUTE/ActivationLayer.cpp  |  198 +++
 .../GLES_COMPUTE/BatchNormalizationLayer.cpp       |  103 ++
 .../GLES_COMPUTE/DepthConcatenateLayer.cpp         |   86 +
 .../GLES_COMPUTE/DirectConvolutionLayer.cpp        |  103 ++
 tests/validation/GLES_COMPUTE/DropoutLayer.cpp     |   82 +
 .../GLES_COMPUTE/FullyConnectedLayer.cpp           |  143 ++
 tests/validation/GLES_COMPUTE/GEMM.cpp             |  105 ++
 .../validation/GLES_COMPUTE/GlobalPoolingLayer.cpp |   83 +
 .../validation/GLES_COMPUTE/NormalizationLayer.cpp |   85 +
 tests/validation/GLES_COMPUTE/PoolingLayer.cpp     |  105 ++
 tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp     |  122 ++
 tests/validation/fixtures/DropoutLayerFixture.h    |  106 ++
 utils/Utils.h                                      |   38 +-
 183 files changed, 28869 insertions(+), 64 deletions(-)
 create mode 100644 arm_compute/core/GLES_COMPUTE/GCHelpers.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/GCKernels.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/IGCKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/IGCTensor.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/OpenGLES.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h
 create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/GCFunctions.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/GCScheduler.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/GCTensor.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCFillBorder.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h
 create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCTranspose.h
 create mode 100644 examples/gc_absdiff.cpp
 create mode 100644 opengles-3.1/include/EGL/egl.h
 create mode 100644 opengles-3.1/include/EGL/eglext.h
 create mode 100644 opengles-3.1/include/EGL/eglplatform.h
 create mode 100644 opengles-3.1/include/GLES/gl.h
 create mode 100644 opengles-3.1/include/GLES/glext.h
 create mode 100644 opengles-3.1/include/GLES/glplatform.h
 create mode 100644 opengles-3.1/include/GLES2/gl2.h
 create mode 100644 opengles-3.1/include/GLES2/gl2ext.h
 create mode 100644 opengles-3.1/include/GLES2/gl2platform.h
 create mode 100644 opengles-3.1/include/GLES3/gl3.h
 create mode 100644 opengles-3.1/include/GLES3/gl31.h
 create mode 100644 opengles-3.1/include/GLES3/gl3ext.h
 create mode 100644 opengles-3.1/include/GLES3/gl3platform.h
 create mode 100644 opengles-3.1/include/KHR/khrplatform.h
 create mode 100644 opengles-3.1/mali_include/EGL/fbdev_window.h
 create mode 100644 opengles-3.1/stubs/EGL.c
 create mode 100644 opengles-3.1/stubs/GLESv2.c
 create mode 100644 opengles-3.1/stubs/Readme.txt
 create mode 100644 opengles-3.1/stubs/SConscript
 create mode 100644 src/core/GLES_COMPUTE/GCKernelLibrary.cpp
 create mode 100644 src/core/GLES_COMPUTE/IGCKernel.cpp
 create mode 100644 src/core/GLES_COMPUTE/IGCSimple2DKernel.cpp
 create mode 100644 src/core/GLES_COMPUTE/IGCSimple3DKernel.cpp
 create mode 100644 src/core/GLES_COMPUTE/IGCSimpleKernel.cpp
 create mode 100644 src/core/GLES_COMPUTE/IGCTensor.cpp
 create mode 100644 src/core/GLES_COMPUTE/OpenGLES.cpp
 create mode 100644 src/core/GLES_COMPUTE/cs_shaders/absdiff.cs
 create mode 100644 src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs
 create mode 100644 src/core/GLES_COMPUTE/cs_shaders/batchnormalization_layer.cs
 create mode 100644 src/core/GLES_COMPUTE/cs_shaders/concatenate.cs
 create mode 100644 src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs
 create mode 100644 src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs
 create mode 100644 src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs
 create mode 100644 src/core/GLES_COMPUTE/cs_shaders/direct_convolution5x5.cs
 create mode 100644 src/core/GLES_COMPUTE/cs_shaders/dropout.cs
 create mode 100644 src/core/GLES_COMPUTE/cs_shaders/fill_border.cs
 create mode 100755 src/core/GLES_COMPUTE/cs_shaders/gemm.cs
 create mode 100644 src/core/GLES_COMPUTE/cs_shaders/helpers.h
 create mode 100755 src/core/GLES_COMPUTE/cs_shaders/normalization_layer.cs
 create mode 100644 src/core/GLES_COMPUTE/cs_shaders/pixelwise_mul_float.cs
 create mode 100644 src/core/GLES_COMPUTE/cs_shaders/pooling_layer.cs
 create mode 100644 src/core/GLES_COMPUTE/cs_shaders/softmax_layer.cs
 create mode 100755 src/core/GLES_COMPUTE/cs_shaders/transpose.cs
 create mode 100644 src/core/GLES_COMPUTE/egl_entries.in
 create mode 100644 src/core/GLES_COMPUTE/gl_entries.in
 create mode 100644 src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp
 create mode 100644 src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp
 create mode 100644 src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp
 create mode 100644 src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp
 create mode 100644 src/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.cpp
 create mode 100644 src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
 create mode 100644 src/core/GLES_COMPUTE/kernels/GCDropoutKernel.cpp
 create mode 100644 src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp
 create mode 100644 src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp
 create mode 100644 src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp
 create mode 100644 src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp
 create mode 100644 src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp
 create mode 100644 src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp
 create mode 100644 src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp
 create mode 100644 src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp
 create mode 100644 src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp
 create mode 100644 src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
 create mode 100644 src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp
 create mode 100644 src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
 create mode 100644 src/runtime/GLES_COMPUTE/GCScheduler.cpp
 create mode 100644 src/runtime/GLES_COMPUTE/GCTensor.cpp
 create mode 100644 src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp
 create mode 100644 src/runtime/GLES_COMPUTE/IGCSimpleFunction.cpp
 create mode 100644 src/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.cpp
 create mode 100644 src/runtime/GLES_COMPUTE/functions/GCActivationLayer.cpp
 create mode 100755 src/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.cpp
 create mode 100755 src/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.cpp
 create mode 100644 src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp
 create mode 100644 src/runtime/GLES_COMPUTE/functions/GCDropoutLayer.cpp
 create mode 100644 src/runtime/GLES_COMPUTE/functions/GCFillBorder.cpp
 create mode 100644 src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp
 create mode 100644 src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp
 create mode 100644 src/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.cpp
 create mode 100644 src/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.cpp
 create mode 100644 src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp
 create mode 100755 src/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.cpp
 create mode 100644 src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp
 create mode 100644 src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp
 create mode 100644 src/runtime/GLES_COMPUTE/functions/GCTranspose.cpp
 create mode 100644 tests/GLES_COMPUTE/GCAccessor.h
 create mode 100644 tests/GLES_COMPUTE/Helper.h
 create mode 100644 tests/benchmark/GLES_COMPUTE/DirectConvolutionLayer.cpp
 create mode 100644 tests/benchmark/GLES_COMPUTE/FullyConnectedLayer.cpp
 create mode 100644 tests/benchmark/GLES_COMPUTE/GEMM.cpp
 create mode 100644 tests/benchmark/GLES_COMPUTE/PoolingLayer.cpp
 create mode 100644 tests/validation/GLES_COMPUTE/ActivationLayer.cpp
 create mode 100644 tests/validation/GLES_COMPUTE/BatchNormalizationLayer.cpp
 create mode 100644 tests/validation/GLES_COMPUTE/DepthConcatenateLayer.cpp
 create mode 100644 tests/validation/GLES_COMPUTE/DirectConvolutionLayer.cpp
 create mode 100644 tests/validation/GLES_COMPUTE/DropoutLayer.cpp
 create mode 100644 tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp
 create mode 100644 tests/validation/GLES_COMPUTE/GEMM.cpp
 create mode 100644 tests/validation/GLES_COMPUTE/GlobalPoolingLayer.cpp
 create mode 100644 tests/validation/GLES_COMPUTE/NormalizationLayer.cpp
 create mode 100644 tests/validation/GLES_COMPUTE/PoolingLayer.cpp
 create mode 100644 tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp
 create mode 100644 tests/validation/fixtures/DropoutLayerFixture.h

diff --git a/SConscript b/SConscript
index acaa9a593d..0679c85070 100644
--- a/SConscript
+++ b/SConscript
@@ -179,6 +179,26 @@ if env['neon']:
     runtime_files += Glob('src/runtime/NEON/*.cpp')
     runtime_files += Glob('src/runtime/NEON/functions/*.cpp')
 
+if env['gles_compute']:
+    if env['os'] != 'android':
+        arm_compute_env.Append(CPPPATH = ["#opengles-3.1/include", "#opengles-3.1/mali_include"])
+
+    core_files += Glob('src/core/GLES_COMPUTE/*.cpp')
+    core_files += Glob('src/core/GLES_COMPUTE/kernels/*.cpp')
+
+    runtime_files += Glob('src/runtime/GLES_COMPUTE/*.cpp')
+    runtime_files += Glob('src/runtime/GLES_COMPUTE/functions/*.cpp')
+
+    # Generate embed files
+    if env['embed_kernels']:
+        cs_files = Glob('src/core/GLES_COMPUTE/cs_shaders/*.cs')
+        cs_files += Glob('src/core/GLES_COMPUTE/cs_shaders/*.h')
+
+        embed_files = [ f.get_path()+"embed" for f in cs_files ]
+        arm_compute_env.Append(CPPPATH =[Dir("./src/core/GLES_COMPUTE/").path] )
+
+        generate_embed.append(arm_compute_env.Command(embed_files, cs_files, action=resolve_includes))
+
 static_core_objects = [arm_compute_env.StaticObject(f) for f in core_files]
 shared_core_objects = [arm_compute_env.SharedObject(f) for f in core_files]
 
diff --git a/SConstruct b/SConstruct
index 904a85fe43..264ed9cbe0 100644
--- a/SConstruct
+++ b/SConstruct
@@ -48,7 +48,8 @@ vars.AddVariables(
     BoolVariable("standalone", "Builds the tests as standalone executables, links statically with libgcc, libstdc++ and libarm_compute", False),
     BoolVariable("opencl", "Enable OpenCL support", True),
     BoolVariable("neon", "Enable Neon support", False),
-    BoolVariable("embed_kernels", "Embed OpenCL kernels in library binary", False),
+    BoolVariable("gles_compute", "Enable OpenGL ES Compute Shader support", False),
+    BoolVariable("embed_kernels", "Embed OpenCL kernels and OpenGL ES compute shaders in library binary", False),
     BoolVariable("set_soname", "Set the library's soname and shlibversion (requires SCons 2.4 or above)", False),
     BoolVariable("openmp", "Enable OpenMP backend", False),
     BoolVariable("cppthreads", "Enable C++11 threads backend", True),
@@ -199,6 +200,7 @@ if env['opencl']:
         print("Cannot link OpenCL statically, which is required on bare metal")
         Exit(1)
 
+if env['opencl'] or env['gles_compute']:
     if env['embed_kernels']:
         env.Append(CPPDEFINES = ['EMBEDDED_KERNELS'])
 
@@ -229,6 +231,9 @@ SConscript('./SConscript', variant_dir='#build/%s' % env['build_dir'], duplicate
 if env['opencl']:
     SConscript("./opencl-1.2-stubs/SConscript", variant_dir="build/%s/opencl-1.2-stubs" % env['build_dir'], duplicate=0)
 
+if env['gles_compute'] and env['os'] != 'android':
+    SConscript("./opengles-3.1/stubs/SConscript", variant_dir="build/%s/opengles-3.1/stubs" % env['build_dir'], duplicate=0)
+
 if env['examples'] and env['os'] != 'bare_metal':
     SConscript('./examples/SConscript', variant_dir='#build/%s/examples' % env['build_dir'], duplicate=0)
 
diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h
index b93bae8d82..365ecb06c4 100644
--- a/arm_compute/core/CL/CLHelpers.h
+++ b/arm_compute/core/CL/CLHelpers.h
@@ -43,7 +43,7 @@ struct enable_bitwise_ops<arm_compute::GPUTarget>
 };
 
 /** Max vector width of an OpenCL vector */
-static constexpr const unsigned int max_cl_vector_width = 16;
+static constexpr unsigned int max_cl_vector_width = 16;
 
 /** Translates a tensor data type to the appropriate OpenCL type.
  *
diff --git a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
index a24432145a..a5559bf8aa 100644
--- a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h
@@ -55,24 +55,24 @@ public:
      * @param[in, out] input   Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
      *                         3 lower dimensions represent a single input with dimensions [width, height, FM].
      *                         The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32.
+     * @param[out]     output  Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
      * @param[in]      mean    Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      var     Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
-     * @param[in]      gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      beta    Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]      gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      epsilon Small value to avoid division with zero.
-     * @param[out]     output  Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
      */
     void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon);
     /** Static function to check if given info will lead to a valid configuration of @ref CLBatchNormalizationLayerKernel
      *
      * @param[in] input   Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result.
      *                    3 lower dimensions represent a single input with dimensions [width, height, FM].
-     * @param[in] output  Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input
      *                    The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32.
+     * @param[in] output  Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input
      * @param[in] mean    Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in] var     Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
-     * @param[in] gamma   Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in] beta    Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in] gamma   Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in] epsilon Small value to avoid division with zero.
      *
      * @return an error status
diff --git a/arm_compute/core/Error.h b/arm_compute/core/Error.h
index fa3f9c0615..277db9d64d 100644
--- a/arm_compute/core/Error.h
+++ b/arm_compute/core/Error.h
@@ -24,6 +24,7 @@
 #ifndef __ARM_COMPUTE_ERROR_H__
 #define __ARM_COMPUTE_ERROR_H__
 
+#include <stdarg.h>
 #include <stdexcept>
 #include <string>
 
@@ -106,6 +107,16 @@ private:
     std::string _description;
 };
 
+/** Creates an error containing the error message from variable argument list
+ *
+ * @param[in] error_code Error code
+ * @param[in] function   Function in which the error occurred.
+ * @param[in] file       Name of the file where the error occurred.
+ * @param[in] line       Line on which the error occurred.
+ * @param[in] msg        Message to display before aborting.
+ * @param[in] args       Variable argument list of the message.
+ */
+Error create_error_va_list(ErrorCode error_code, const char *function, const char *file, const int line, const char *msg, va_list args);
 /** Creates an error containing the error message
  *
  * @param[in] error_code Error code
@@ -241,7 +252,7 @@ Error create_error(ErrorCode error_code, const char *function, const char *file,
  * @param[in] error Error value to check.
  */
 #define ARM_COMPUTE_ERROR_THROW_ON(error) \
-    error.throw_if_error();
+    error.throw_if_error()
 
 /** If the condition is true, the given message is printed and an exception is thrown
  *
diff --git a/arm_compute/core/GLES_COMPUTE/GCHelpers.h b/arm_compute/core/GLES_COMPUTE/GCHelpers.h
new file mode 100644
index 0000000000..475554f2be
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/GCHelpers.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCHELPERS_H__
+#define __ARM_COMPUTE_GCHELPERS_H__
+
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/Helpers.h"
+#include "support/ToolchainSupport.h"
+
+#include <string>
+
+namespace arm_compute
+{
+/** Helper function to create and return a unique_ptr pointed to a GLES kernel object
+ *  It also calls the kernel's configuration.
+ *
+ * @param[in] args All the arguments that need pass to kernel's configuration.
+ *
+ * @return A unique pointer pointed to a GLES kernel object
+ */
+template <typename Kernel, typename... T>
+std::unique_ptr<Kernel> create_configure_kernel(T &&... args)
+{
+    std::unique_ptr<Kernel> k = arm_compute::support::cpp14::make_unique<Kernel>();
+    k->configure(std::forward<T>(args)...);
+    return k;
+}
+
+/** Helper function to create and return a unique_ptr pointed to a GLES kernel object
+ *
+ * @return A unique pointer pointed to a GLES kernel object
+ */
+template <typename Kernel>
+std::unique_ptr<Kernel> create_kernel()
+{
+    std::unique_ptr<Kernel> k = arm_compute::support::cpp14::make_unique<Kernel>();
+    return k;
+}
+
+/** Max vector width of an GLES vector */
+static constexpr unsigned int max_gc_vector_width = 16;
+}
+#endif /* __ARM_COMPUTE_GCHELPERS_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h b/arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h
new file mode 100644
index 0000000000..e601b529ed
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCKERNELLIBRARY_H__
+#define __ARM_COMPUTE_GCKERNELLIBRARY_H__
+
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/Utils.h"
+
+#include <map>
+#include <set>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace arm_compute
+{
+/** GCProgram class */
+class GCProgram
+{
+public:
+    /** Default constructor. */
+    GCProgram();
+    /** Construct program from source file.
+     *
+     * @param[in] name   Program name.
+     * @param[in] source Program source.
+     */
+    GCProgram(std::string name, std::string source);
+    /** Default Copy Constructor. */
+    GCProgram(const GCProgram &) = default;
+    /** Default Move Constructor. */
+    GCProgram(GCProgram &&) = default;
+    /** Default copy assignment operator. */
+    GCProgram &operator=(const GCProgram &) = default;
+    /** Default move assignment operator. */
+    GCProgram &operator=(GCProgram &&) = default;
+    /** Returns program name.
+     *
+     * @return Program's name.
+     */
+    std::string name() const
+    {
+        return _name;
+    }
+    /** Link program.
+     *
+     * @param[in] shader Shader used to link program.
+     *
+     * @return linked program id .
+     */
+    GLuint link_program(GLuint shader);
+    /** Compile shader.
+     *
+     * @param[in] build_options Shader build options.
+     *
+     * @return GLES shader object.
+     */
+    GLuint compile_shader(const std::string &build_options);
+
+private:
+    std::string _name;   /**< Program name. */
+    std::string _source; /**< Source code for the program. */
+};
+
+/** GCKernel class */
+class GCKernel
+{
+public:
+    /** Default Constructor. */
+    GCKernel();
+    /** Default Copy Constructor. */
+    GCKernel(const GCKernel &) = default;
+    /** Default Move Constructor. */
+    GCKernel(GCKernel &&) = default;
+    /** Default copy assignment operator. */
+    GCKernel &operator=(const GCKernel &) = default;
+    /** Default move assignment operator. */
+    GCKernel &operator=(GCKernel &&) = default;
+    /** Constructor.
+     *
+     * @param[in] name    Kernel name.
+     * @param[in] program Built program.
+     */
+    GCKernel(std::string name, GLuint program);
+    /** Returns kernel name.
+     *
+     * @return Kernel's name.
+     */
+    std::string name() const
+    {
+        return _name;
+    }
+    /** Get program id.
+     *
+     * @return program id.
+     */
+    GLuint get_program() const
+    {
+        return _program;
+    }
+    /** Use current program.
+     *
+     * @return program id.
+     */
+    void use();
+    /** Unuse current program.
+     *
+     * @return program id.
+     */
+    void unuse();
+    /** Set value at uniform idx.
+     *
+     * @param[in] idx   Index in vector.
+     * @param[in] value Set value.
+     */
+    template <class T>
+    void set_params(unsigned int idx, T value)
+    {
+        if(idx >= _params.size())
+        {
+            _params.resize(idx + 1, 0);
+        }
+
+        unsigned int *p = reinterpret_cast<unsigned int *>(&value);
+        _params[idx]    = *p;
+    }
+    /** Clear params.
+     *
+     */
+    void clear_params()
+    {
+        _params.clear();
+    }
+    /** Set shader params binding point.
+     *
+     * @param[in] binding Shader params binding point.
+     */
+    void set_shader_params_binding_point(unsigned int binding)
+    {
+        _shader_params_binding_point = binding;
+    }
+    /** Update shader params.
+     *
+     */
+    void update_shader_params();
+    /** Clean up program and ubo.
+     *
+     */
+    void cleanup();
+
+private:
+    std::string                  _name;                                 /**< Kernel name */
+    GLuint                       _program;                              /**< Linked program id */
+    std::vector<unsigned int>    _params;                               /**< Store all the values of the shader parameters */
+    GLuint                       _shader_params;                        /**< Uniform buffer object name for shader parameters */
+    GLuint                       _shader_params_binding_point;          /**< The binding point of the uniform block for shader parameters */
+    GLuint                       _shader_params_index;                  /**< The index of the uniform block */
+    GLint                        _shader_params_size;                   /**< The uniform block data size in the shader */
+    static constexpr const char *_shader_params_name = "shader_params"; /**< The uniform block name in the shader */
+};
+
+/** GCKernelLibrary class */
+class GCKernelLibrary
+{
+    using StringSet = std::set<std::string>;
+
+private:
+    /** Default Constructor. */
+    GCKernelLibrary();
+
+public:
+    /** Prevent instances of this class from being copied. */
+    GCKernelLibrary(const GCKernelLibrary &) = delete;
+    /** Prevent instances of this class from being copied. */
+    const GCKernelLibrary &operator=(const GCKernelLibrary &) = delete;
+    /** Default Destructor. */
+    ~GCKernelLibrary();
+
+    static GCKernelLibrary &get();
+    /** Initialises the kernel library.
+     *
+     * @param[in] shader_path (Optional) Path of the directory from which shader sources are loaded.
+     * @param[in] dpy         (Optional) EGLdisplay set by external application.
+     * @param[in] ctx         (Optional) EGLContext set by external application.
+     */
+    void init(std::string shader_path = "./", EGLDisplay dpy = EGL_NO_DISPLAY, EGLContext ctx = EGL_NO_CONTEXT)
+    {
+        //TODO: deal with old display and context.
+        _shader_path = std::move(shader_path);
+
+        _display = dpy;
+        _context = ctx;
+
+        if(_display == EGL_NO_DISPLAY || _context == EGL_NO_CONTEXT)
+        {
+            setup_context();
+
+            _own_context = true;
+        }
+
+        eglMakeCurrent(_display, EGL_NO_SURFACE, EGL_NO_SURFACE, _context);
+        setup_dummy_fbo();
+    }
+
+    /** Sets the path that the shaders reside in.
+     *
+     * @param[in] shader_path Path of the shader.
+     */
+    void set_shader_path(const std::string &shader_path)
+    {
+        _shader_path = shader_path;
+    };
+    /** Sets display and context to create kernel.
+     *
+     * @param[in] dpy EGLdisplay set by external application.
+     * @param[in] ctx EGLContext set by external application.
+     */
+    void set_context(EGLDisplay dpy, EGLContext ctx)
+    {
+        //TODO: deal with old display and context.
+        _display = dpy;
+        _context = ctx;
+
+        eglMakeCurrent(dpy, EGL_NO_SURFACE, EGL_NO_SURFACE, ctx);
+        setup_dummy_fbo();
+    };
+    /** Creates a kernel from the kernel library.
+     *
+     * @param[in] shader_name       Shader name.
+     * @param[in] build_options_set Shader build options as a set.
+     *
+     * @return The created kernel.
+     */
+    GCKernel create_kernel(const std::string &shader_name, const StringSet &build_options_set = {}) const;
+    /** Serializes and saves programs to a binary.
+     *
+     */
+    void save_binary();
+    /** Load serialized binary with all the programs.
+     *
+     */
+    void load_binary();
+    /** Setup a dummy fbo to workaround an issue on Galaxy S8.
+     *
+     */
+    void setup_dummy_fbo();
+
+private:
+    /** Preprocess GLES shader
+     *
+     * @param[in] shader_source Source code of the shader to preprocess.
+     *
+     * @return Preprocessed GLES shader object.
+     */
+    const std::string preprocess_shader(const std::string &shader_source) const;
+    /** Load program and its dependencies.
+     *
+     * @param[in] program_name Name of the program to load.
+     */
+    const GCProgram &load_program(const std::string &program_name) const;
+    /** Concatenates contents of a set into a single string.
+     *
+     * @param[in] s Input set to concatenate.
+     *
+     * @return Concatenated string.
+     */
+    std::string stringify_set(const StringSet &s) const;
+    /** Set up EGL context.
+     */
+    void setup_context();
+
+    EGLDisplay  _display;                                                /**< Underlying EGL Display. */
+    EGLContext  _context;                                                /**< Underlying EGL Context. */
+    GLuint      _frame_buffer;                                           /**< Dummy fbo */
+    GLuint      _tex_rt;                                                 /**< Dummy texture for render target */
+    bool        _own_context;                                            /**< Self created context or not. */
+    std::string _shader_path;                                            /**< Path to the shaders folder. */
+    mutable std::map<std::string, const GCProgram>  _programs_map;       /**< Map with all already loaded program data. */
+    mutable std::map<std::string, const GCKernel>   _built_programs_map; /**< Map with all already built program data. */
+    static const std::map<std::string, std::string> _shader_program_map; /**< Map that associates kernel names with programs. */
+    static const std::map<std::string, std::string> _program_source_map; /**< Contains sources for all programs.
+                                                                              Used for compile-time shader inclusion. */
+};
+}
+#endif /* __ARM_COMPUTE_GCKERNELLIBRARY_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/GCKernels.h b/arm_compute/core/GLES_COMPUTE/GCKernels.h
new file mode 100644
index 0000000000..57d11d5f18
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/GCKernels.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCKERNELS_H__
+#define __ARM_COMPUTE_GCKERNELS_H__
+
+/* Header regrouping all the GLES compute kernels */
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h"
+
+#endif /* __ARM_COMPUTE_GCKERNELS_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/IGCKernel.h b/arm_compute/core/GLES_COMPUTE/IGCKernel.h
new file mode 100644
index 0000000000..0d3bfb30fd
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/IGCKernel.h
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_IGCKERNEL_H__
+#define __ARM_COMPUTE_IGCKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+
+#include "arm_compute/core/IKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+class Window;
+
+/** Common interface for all the GLES kernels */
+class IGCKernel : public IKernel
+{
+public:
+    /** Constructor */
+    IGCKernel();
+    /** Returns a reference to the GLES kernel of this object.
+     *
+     * @return A reference to the GLES kernel of this object.
+     */
+    GCKernel &kernel();
+
+    class BufferParam
+    {
+    public:
+        /** Tensor's binding point in this kernel. */
+        unsigned int binding_point = 0;
+        /** The base 2 logarithm of SSBO buffer data type size (Number of bits to be shift for offset calculation) */
+        unsigned int buffer_data_type_shift = 0;
+
+        /** Constructor
+         *
+         * @param[in] binding Tensor's binding point.
+         * @param[in] shift   Number of bits to be shift for offset calculation
+        */
+        BufferParam(const unsigned int binding, const unsigned int shift)
+            : binding_point(binding), buffer_data_type_shift(shift)
+        {
+        }
+    };
+
+    /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx.
+     *
+     * @param[in] idx           Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
+     * @param[in] tensor        Tensor to set as an argument of the object's kernel.
+     * @param[in] binding_point Tensor's binding point in this kernel.
+     * @param[in] window        Window the kernel will be executed on.
+     */
+    void add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window);
+
+    /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx.
+     *
+     * @param[in] idx    Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
+     * @param[in] tensor Tensor to set as an argument of the object's kernel.
+     * @param[in] param  Additional parameter for GLES SSBO buffer.
+     * @param[in] window Window the kernel will be executed on.
+     */
+    void add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window);
+
+    /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx.
+     *
+     * @param[in] idx           Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
+     * @param[in] tensor        Tensor to set as an argument of the object's kernel.
+     * @param[in] binding_point Tensor's binding point in this kernel.
+     * @param[in] window        Window the kernel will be executed on.
+     */
+    void add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window);
+
+    /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx.
+     *
+     * @param[in] idx    Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
+     * @param[in] tensor Tensor to set as an argument of the object's kernel.
+     * @param[in] param  Additional parameter for GLES SSBO buffer.
+     * @param[in] window Window the kernel will be executed on.
+     */
+    void add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window);
+
+    /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx.
+     *
+     * @param[in] idx           Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
+     * @param[in] tensor        Tensor to set as an argument of the object's kernel.
+     * @param[in] binding_point Tensor's binding point in this kernel.
+     * @param[in] window        Window the kernel will be executed on.
+     */
+    void add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window);
+
+    /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx.
+     *
+     * @param[in] idx    Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
+     * @param[in] tensor Tensor to set as an argument of the object's kernel.
+     * @param[in] param  Additional parameter for GLES SSBO buffer.
+     * @param[in] window Window the kernel will be executed on.
+     */
+    void add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window);
+
+    /** Returns the number of arguments enqueued per 1D tensor object.
+     *
+     * @return The number of arguments enqueues per 1D tensor object.
+     */
+    unsigned int num_arguments_per_1D_tensor() const;
+    /** Returns the number of arguments enqueued per 2D tensor object.
+     *
+     * @return The number of arguments enqueues per 2D tensor object.
+     */
+    unsigned int num_arguments_per_2D_tensor() const;
+    /** Returns the number of arguments enqueued per 3D tensor object.
+     *
+     * @return The number of arguments enqueues per 3D tensor object.
+     */
+    unsigned int num_arguments_per_3D_tensor() const;
+    /** Enqueue the OpenGL ES shader to process the given window
+     *
+     * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+     */
+    virtual void run(const Window &window) = 0;
+
+private:
+    /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx.
+     *
+     * @param[in] idx    Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0.
+     * @param[in] tensor Tensor to set as an argument of the object's kernel.
+     * @param[in] param  Additional parameter for GLES SSBO buffer.
+     * @param[in] window Window the kernel will be executed on.
+     */
+    template <unsigned int dimension_size>
+    void add_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window);
+
+    /** Returns the number of arguments enqueued per tensor object.
+     *
+     * @return The number of arguments enqueued per tensor object.
+     */
+    template <unsigned int dimension_size>
+    unsigned int           num_arguments_per_tensor() const;
+
+protected:
+    GCKernel _kernel; /**< GLES kernel to run */
+};
+
+/** Add the kernel to the command queue with the given window.
+ *
+ * @note Depending on the size of the window, this might translate into several jobs being enqueued.
+ *
+ * @note If kernel->kernel() is empty then the function will return without adding anything to the queue.
+ *
+ * @param[in] kernel Kernel to enqueue
+ * @param[in] window Window the kernel has to process.
+ * @param[in] lws    Local workgroup size requested, by default (1, 1, 1)
+ *
+ * @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed.
+ */
+void enqueue(IGCKernel &kernel, const Window &window, const gles::NDRange &lws = gles::NDRange(1U, 1U, 1U));
+}
+#endif /*__ARM_COMPUTE_IGCKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h b/arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h
new file mode 100644
index 0000000000..413e86a2b7
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_IGCSIMPLE2DKERNEL_H__
+#define __ARM_COMPUTE_IGCSIMPLE2DKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for simple OpenGL ES kernels having 1 tensor input and 1 tensor output. This interface can be used when the work-item processes a 2D tile */
+class IGCSimple2DKernel : public IGCSimpleKernel
+{
+public:
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+};
+}
+#endif /*__ARM_COMPUTE_IGCSIMPLE2DKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h b/arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h
new file mode 100644
index 0000000000..622e53c38b
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_IGCSIMPLE3DKERNEL_H__
+#define __ARM_COMPUTE_IGCSIMPLE3DKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for simple GLES kernels having 1 tensor input and 1 tensor output.
+ *  Both input tensor and output tensor must have at least 3 dimensions.
+ */
+class IGCSimple3DKernel : public IGCSimple2DKernel
+{
+public:
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+};
+}
+#endif /*__ARM_COMPUTE_IGCSIMPLE3DKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h b/arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h
new file mode 100644
index 0000000000..a23c4e774e
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2016, 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_IGCSIMPLEKERNEL_H__
+#define __ARM_COMPUTE_IGCSIMPLEKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/Helpers.h"
+
+namespace arm_compute
+{
+/** Interface for simple OpenGL ES kernels having 1 tensor input and 1 tensor output */
+class IGCSimpleKernel : public IGCKernel
+{
+public:
+    /** Constructor. */
+    IGCSimpleKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    IGCSimpleKernel(const IGCSimpleKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    IGCSimpleKernel &operator=(const IGCSimpleKernel &) = delete;
+    /** Allow instances of this class to be moved. */
+    IGCSimpleKernel(IGCSimpleKernel &&) = default;
+    /** Allow instances of this class to be moved. */
+    IGCSimpleKernel &operator=(IGCSimpleKernel &&) = default;
+    /** Default destructor */
+    ~IGCSimpleKernel() = default;
+
+    /** Configure the kernel
+     *
+     * @param[in]  input                             Source tensor.
+     * @param[out] output                            Destination tensor.
+     * @param[in]  num_elems_processed_per_iteration Number of processed elements per iteration.
+     * @param[in]  border_undefined                  (Optional) True if the border mode is undefined. False if it's replicate or constant.
+     * @param[in]  border_size                       (Optional) Size of the border.
+     */
+    void configure(const IGCTensor *input, IGCTensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize());
+
+protected:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+};
+}
+
+#endif /*__ARM_COMPUTE_IGCSIMPLEKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/IGCTensor.h b/arm_compute/core/GLES_COMPUTE/IGCTensor.h
new file mode 100644
index 0000000000..ab4e57e0ce
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/IGCTensor.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_IGCTENSOR_H__
+#define __ARM_COMPUTE_IGCTENSOR_H__
+
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/ITensor.h"
+
+#include <cstdint>
+
+namespace arm_compute
+{
+/** Interface for GLES Compute tensor */
+class IGCTensor : public ITensor
+{
+public:
+    /** Default constructor. */
+    IGCTensor();
+
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    IGCTensor(const IGCTensor &) = delete;
+
+    /** Prevent instances of this class from being copy assigned (As this class contains pointers). */
+    IGCTensor &operator=(const IGCTensor &) = delete;
+
+    /** Allow instances of this class to be moved */
+    IGCTensor(IGCTensor &&) = default;
+
+    /** Allow instances of this class to be moved */
+    IGCTensor &operator=(IGCTensor &&) = default;
+
+    /** Virtual destructor */
+    virtual ~IGCTensor() = default;
+
+    /** Map on an allocated buffer.
+     *
+     * @param[in] blocking (Optional) If true, then the mapping will be ready to use by the time
+     *                     this method returns, else it is the caller's responsibility
+     *                     to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
+     */
+    void map(bool blocking = true);
+    /** Unmap an allocated and mapped buffer.
+     */
+    void unmap();
+    /** Clear the contents of the tensor synchronously.
+     */
+    void clear();
+
+    // Inherited methods overridden:
+    uint8_t *buffer() const override;
+    /** Interface to be implemented by the child class to return the tensor's gles compute buffer id.
+      *
+      * @return A SSBO buffer id.
+     */
+    virtual GLuint gc_buffer() const = 0;
+
+protected:
+    /** Method to be implemented by the child class to map the SSBO.
+     *
+     * @param[in] blocking If true, then the mapping will be ready to use by the time
+     *                     this method returns, else it is the caller's responsibility
+     *                     to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
+     */
+    virtual uint8_t *do_map(bool blocking) = 0;
+    /** Method to be implemented by the child class to unmap the SSBO.
+     *
+     * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+     *       the memory is accessed by the device.
+     */
+    virtual void do_unmap() = 0;
+
+private:
+    uint8_t *_mapping;
+};
+
+using IGCImage = IGCTensor;
+}
+#endif /*__ARM_COMPUTE_IGCTENSOR_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/OpenGLES.h b/arm_compute/core/GLES_COMPUTE/OpenGLES.h
new file mode 100644
index 0000000000..e123982945
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/OpenGLES.h
@@ -0,0 +1,165 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_OPENGLES_H__
+#define __ARM_COMPUTE_OPENGLES_H__
+
+#include "arm_compute/core/Log.h"
+
+#include <EGL/egl.h>
+#include <EGL/eglext.h>
+#include <EGL/eglplatform.h>
+#include <GLES3/gl31.h>
+#include <GLES3/gl3ext.h>
+#include <cstddef>
+
+#ifdef ARM_COMPUTE_DEBUG_ENABLED
+#define ARM_COMPUTE_GL_CHECK(x)                                                                      \
+    x;                                                                                               \
+    {                                                                                                \
+        GLenum error = glGetError();                                                                 \
+        if(error != GL_NO_ERROR)                                                                     \
+            ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("glGetError() = %i (0x%.8x)\n", error, error); \
+    }
+#else /* ARM_COMPUTE_DEBUG_ENABLED */
+#define ARM_COMPUTE_GL_CHECK(x) x
+#endif /* ARM_COMPUTE_DEBUG_ENABLED */
+
+namespace arm_compute
+{
+namespace gles
+{
+/** Class interface for specifying NDRange values. */
+class NDRange
+{
+private:
+    size_t _sizes[3];
+    size_t _dimensions;
+
+public:
+    /** Default constructor - resulting range has zero dimensions. */
+    NDRange()
+        : _dimensions(0)
+    {
+        _sizes[0] = 0;
+        _sizes[1] = 0;
+        _sizes[2] = 0;
+    }
+
+    /** Constructs one-dimensional range.
+     *
+     * @param[in] size0 Size of the first dimension.
+     */
+    NDRange(size_t size0)
+        : _dimensions(1)
+    {
+        _sizes[0] = size0;
+        _sizes[1] = 1;
+        _sizes[2] = 1;
+    }
+
+    /** Constructs two-dimensional range.
+     *
+     * @param[in] size0 Size of the first dimension.
+     * @param[in] size1 Size of the second dimension.
+     */
+    NDRange(size_t size0, size_t size1)
+        : _dimensions(2)
+    {
+        _sizes[0] = size0;
+        _sizes[1] = size1;
+        _sizes[2] = 1;
+    }
+
+    /** Constructs three-dimensional range.
+     *
+     * @param[in] size0 Size of the first dimension.
+     * @param[in] size1 Size of the second dimension.
+     * @param[in] size2 Size of the third dimension.
+     */
+    NDRange(size_t size0, size_t size1, size_t size2)
+        : _dimensions(3)
+    {
+        _sizes[0] = size0;
+        _sizes[1] = size1;
+        _sizes[2] = size2;
+    }
+
+    /** Conversion operator to const size_t *.
+     *
+     *  @returns A pointer to the size of the first dimension.
+     */
+    operator const size_t *() const
+    {
+        return _sizes;
+    }
+
+    /** Queries the number of dimensions in the range.
+     *
+     * @returns The number of dimensions.
+    */
+    size_t dimensions() const
+    {
+        return _dimensions;
+    }
+
+    /** Returns the size of the object in bytes based on the runtime number of dimensions
+     *
+     * @returns The size of the object in bytes.
+     */
+    size_t size() const
+    {
+        return _dimensions * sizeof(size_t);
+    }
+
+    /** Returns the sizes array for each dimensions.
+     *
+     * @returns The sizes array
+     */
+    size_t *get()
+    {
+        return _sizes;
+    }
+
+    /** Returns the sizes array for each dimensions.
+     *
+     * @returns The sizes array
+     */
+    const size_t *get() const
+    {
+        return _sizes;
+    }
+};
+
+static const NDRange NullRange;
+static const NDRange Range_128_1 = NDRange(128, 1);
+} // namespace gles
+
+/** Check if the OpenGL ES 3.1 API is available at runtime.
+ *
+ *  @returns true if the OpenGL ES 3.1 API is available.
+ */
+bool opengles31_is_available();
+} // namespace arm_compute
+
+#endif /* __ARM_COMPUTE_OPENGLES_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h
new file mode 100644
index 0000000000..71f7b37700
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCABSOLUTEDIFFERENCEKERNEL_H__
+#define __ARM_COMPUTE_GCABSOLUTEDIFFERENCEKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the absolute difference kernel.
+ *
+ * Absolute difference is computed by:
+ * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f]
+ */
+class GCAbsoluteDifferenceKernel : public IGCKernel
+{
+public:
+    /** Default constructor. */
+    GCAbsoluteDifferenceKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    GCAbsoluteDifferenceKernel(const GCAbsoluteDifferenceKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    GCAbsoluteDifferenceKernel &operator=(const GCAbsoluteDifferenceKernel &) = delete;
+    /** Allow instances of this class to be moved. */
+    GCAbsoluteDifferenceKernel(GCAbsoluteDifferenceKernel &&) = default;
+    /** Allow instances of this class to be moved. */
+    GCAbsoluteDifferenceKernel &operator=(GCAbsoluteDifferenceKernel &&) = default;
+    /** Default destructor */
+    ~GCAbsoluteDifferenceKernel() = default;
+
+    /** Set the inputs and output images.
+     *
+     * @param[in]  input1 Source tensor. Data types supported: U8
+     * @param[in]  input2 Source tensor. Data types supported: U8
+     * @param[out] output Destination tensor. Data types supported: U8
+     */
+    void configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input1; /**< Source tensor 1. */
+    const IGCTensor *_input2; /**< Source tensor 2. */
+    IGCTensor       *_output; /**< Destination tensor. */
+};
+}
+#endif /* __ARM_COMPUTE_GCABSOLUTEDIFFERENCEKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h
new file mode 100644
index 0000000000..fc1d52f455
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCACTIVATIONLAYERKERNEL_H__
+#define __ARM_COMPUTE_GCACTIVATIONLAYERKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the activation layer kernel. */
+class GCActivationLayerKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCActivationLayerKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCActivationLayerKernel(const GCActivationLayerKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCActivationLayerKernel &operator=(const GCActivationLayerKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCActivationLayerKernel(GCActivationLayerKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCActivationLayerKernel &operator=(GCActivationLayerKernel &&) = default;
+    /** Default destructor */
+    ~GCActivationLayerKernel() = default;
+    /** Set the input and output tensor.
+     *
+     * @note If the output tensor is a nullptr, the activation function will be performed in-place
+     *
+     * @param[in, out] input    Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
+     *                          of the activation function. Data types supported: F16/F32.
+     * @param[out]     output   Destination tensor. Data type should match the input data type.
+     * @param[in]      act_info Activation layer information.
+     */
+    void configure(IGCTensor *input, IGCTensor *output, ActivationLayerInfo act_info);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    IGCTensor *_input;
+    IGCTensor *_output;
+};
+}
+#endif /*__ARM_COMPUTE_GCACTIVATIONLAYERKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h
new file mode 100644
index 0000000000..2bbd6a83fe
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCBATCHNORMALIZATIONLAYERKERNEL_H__
+#define __ARM_COMPUTE_GCBATCHNORMALIZATIONLAYERKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the BatchNormalization layer kernel.
+ */
+class GCBatchNormalizationLayerKernel : public IGCKernel
+{
+public:
+    /** Constructor */
+    GCBatchNormalizationLayerKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCBatchNormalizationLayerKernel(const GCBatchNormalizationLayerKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCBatchNormalizationLayerKernel &operator=(const GCBatchNormalizationLayerKernel &) = delete;
+    /** Default Move Constructor. */
+    GCBatchNormalizationLayerKernel(GCBatchNormalizationLayerKernel &&) = default;
+    /** Default move assignment operator. */
+    GCBatchNormalizationLayerKernel &operator=(GCBatchNormalizationLayerKernel &&) = default;
+    /** Default destructor */
+    ~GCBatchNormalizationLayerKernel() = default;
+
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input   Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM].
+     *                     The rest are optional and used for representing batches. Data types supported: F16/F32.
+     * @param[out] output  Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
+     * @param[in]  mean    Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  var     Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  beta    Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  epsilon Small value to avoid division with zero.
+     */
+    void configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta, const IGCTensor *gamma, float epsilon);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+    const IGCTensor *_mean;
+    const IGCTensor *_var;
+    const IGCTensor *_beta;
+    const IGCTensor *_gamma;
+    float            _epsilon;
+};
+}
+#endif /*__ARM_COMPUTE_GCBATCHNORMALIZATIONLAYERKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h
new file mode 100644
index 0000000000..257ab0eca0
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCCOL2IMKERNEL_H__
+#define __ARM_COMPUTE_GCCOL2IMKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the col2im reshaping kernel.
+ *
+ * Rearranges each matrix column into image blocks. It's the inverse operation of @ref GCIm2ColKernel.
+ *
+ * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3:
+ *
+ * @f[
+ * \left( \begin{array}{ccccccccc}
+ * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccc}
+ * a0 & a1 & a2 \\
+ * a3 & a4 & a5 \\
+ * a6 & a7 & a8 \\
+ * \end{array} \right)
+ * @f]
+ */
+class GCCol2ImKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCCol2ImKernel();
+
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCCol2ImKernel(const GCCol2ImKernel &) = delete;
+
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCCol2ImKernel &operator=(const GCCol2ImKernel &) = delete;
+
+    /** Allow instances of this class to be moved */
+    GCCol2ImKernel(GCCol2ImKernel &&) = default;
+
+    /** Allow instances of this class to be moved */
+    GCCol2ImKernel &operator=(GCCol2ImKernel &&) = default;
+
+    /** Default destructor */
+    ~GCCol2ImKernel() = default;
+
+    /** Set the input and output of the kernel.
+     *
+     * @param[in]  input          The input tensor to convert. Data types supported: F32
+     * @param[out] output         The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
+     *                            while the rest represent batch of outputs. Data types supported: Same as @p input
+     * @param[in]  convolved_dims Output convolved dimensions.
+     */
+    void configure(const IGCTensor *input, IGCTensor *output, std::pair<unsigned int, unsigned int> convolved_dims);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+    std::pair<unsigned int, unsigned int> _convolved_dims;
+};
+}
+
+#endif /*__ARM_COMPUTE_GCCOL2IMKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h
new file mode 100644
index 0000000000..9a34a9a9c5
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__
+#define __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the depth concatenate kernel.
+ *  The input tensor will be concatenated into the output tensor.
+ */
+class GCDepthConcatenateKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCDepthConcatenateKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCDepthConcatenateKernel(const GCDepthConcatenateKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCDepthConcatenateKernel &operator=(const GCDepthConcatenateKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCDepthConcatenateKernel(GCDepthConcatenateKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCDepthConcatenateKernel &operator=(GCDepthConcatenateKernel &&) = default;
+    /** Default destructor */
+    ~GCDepthConcatenateKernel() = default;
+    /** Initialise the kernel's inputs and output
+     *
+     * @param[in]     input        Input tensor. Data types supported: F16/F32.
+     * @param[in]     depth_offset The offset on the Z axis.
+     * @param[in,out] output       Output tensor. Data types supported: Same as @p input.
+     *
+     * @note: The output tensor's low two dimensions can't be smaller than the input one's.
+     * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2.
+     *
+     */
+    void configure(const IGCTensor *input, unsigned int depth_offset, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+    BorderSize border_size() const override;
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+    int              _top_bottom;
+    int              _left_right;
+};
+}
+#endif /* __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h
new file mode 100644
index 0000000000..415b781bc6
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYERKERNEL_H__
+#define __ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYERKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the direct convolution kernel.
+ */
+template <unsigned int kernel_size>
+class GCDirectConvolutionLayerKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCDirectConvolutionLayerKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCDirectConvolutionLayerKernel(const GCDirectConvolutionLayerKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCDirectConvolutionLayerKernel &operator=(const GCDirectConvolutionLayerKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCDirectConvolutionLayerKernel(GCDirectConvolutionLayerKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCDirectConvolutionLayerKernel &operator=(GCDirectConvolutionLayerKernel &&) = default;
+    /** Default destructor */
+    ~GCDirectConvolutionLayerKernel() = default;
+    /** Set the input and output of the kernel.
+     *
+     * @param[in]  input     The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
+     *                       while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32
+     * @param[in]  weights   Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+     * @param[in]  bias      Biases tensor. Shared bias supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input.
+     * @param[out] output    The output tensor. First 2 lower dimensions represent a transform of each 3D input,
+     *                       while every dimension above represents a batch. Data types supported: Same as @p input
+     * @param[in]  conv_info Contains padding and stride information described in @ref PadStrideInfo.
+     */
+    void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *bias, IGCTensor *output, const PadStrideInfo &conv_info);
+
+    // Inherited methods overridden:
+    BorderSize border_size() const override;
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    const IGCTensor *_bias;
+    const IGCTensor *_weights;
+    IGCTensor       *_output;
+    BorderSize       _border_size;
+    int              _conv_stride_x;
+    int              _conv_stride_y;
+    int              _conv_pad_x;
+    int              _conv_pad_y;
+    gles::NDRange    _lws;
+};
+
+using GCDirectConvolutionLayer1x1Kernel = GCDirectConvolutionLayerKernel<1>;
+using GCDirectConvolutionLayer3x3Kernel = GCDirectConvolutionLayerKernel<3>;
+using GCDirectConvolutionLayer5x5Kernel = GCDirectConvolutionLayerKernel<5>;
+}
+#endif /*__ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYERKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h
new file mode 100644
index 0000000000..6159a7af26
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCDROPOUTKERNEL_H__
+#define __ARM_COMPUTE_GCDROPOUTKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the dropout kernel.
+ *
+ * Dropout is used to improve over-fit on neural networks.
+ *
+ */
+class GCDropoutKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCDropoutKernel();
+
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCDropoutKernel(const GCDropoutKernel &) = delete;
+
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCDropoutKernel &operator=(const GCDropoutKernel &) = delete;
+
+    /** Allow instances of this class to be moved */
+    GCDropoutKernel(GCDropoutKernel &&) = default;
+
+    /** Allow instances of this class to be moved */
+    GCDropoutKernel &operator=(GCDropoutKernel &&) = default;
+
+    /** Set the input and output of the kernel.
+     *
+     * @param[in]  input   The input tensor for this op. Data types supported: F16/F32
+     * @param[out] mask    The mask tensor. Data types supported: Same as @p input
+     * @param[out] output  The output tensor. Data types supported: Same as @p input
+     * @param[in]  ratio   Dropout ratio
+     * @param[in]  forward Forward or backward propagation
+     *
+     */
+    void configure(const IGCTensor *input, IGCTensor *mask, IGCTensor *output, float ratio, bool forward);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_mask;
+    IGCTensor       *_output;
+    unsigned int     _num_elems_processed_per_iteration;
+};
+}
+
+#endif /*__ARM_COMPUTE_GCDROPOUTKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h
new file mode 100644
index 0000000000..acb8aa67d3
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCFILLBORDERKERNEL_H__
+#define __ARM_COMPUTE_GCFILLBORDERKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for filling the border of a kernel */
+class GCFillBorderKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCFillBorderKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCFillBorderKernel(const GCFillBorderKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCFillBorderKernel &operator=(const GCFillBorderKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCFillBorderKernel(GCFillBorderKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCFillBorderKernel &operator=(GCFillBorderKernel &&) = default;
+    /** Default destructor */
+    ~GCFillBorderKernel() = default;
+
+    /** Initialise the kernel's input, output and border mode.
+     *
+     * @param[in,out] tensor                Tensor to process Data types supported: F16/F32.
+     * @param[in]     border_size           Size of the border to fill in elements.
+     * @param[in]     border_mode           Border mode to use for the convolution.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(const IGCTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
+
+    /** Function to set the constant value on fill border kernel depending on type.
+     *
+     * @param[in] idx                   Index of the kernel argument to set.
+     * @param[in] constant_border_value Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    template <class T>
+    void set_constant_border(unsigned int idx, const PixelValue &constant_border_value);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+    bool is_parallelisable() const override;
+
+private:
+    const IGCTensor *_tensor;
+};
+}
+#endif /*__ARM_COMPUTE_GCFILLBORDERKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h
new file mode 100644
index 0000000000..b2369a6ad1
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCGEMMINTERLEAVE4X4KERNEL_H__
+#define __ARM_COMPUTE_GCGEMMINTERLEAVE4X4KERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** OpenGL ES kernel which interleaves the elements of a matrix A in chunk of 4x4
+ *
+ * This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values)
+ *
+ * @f[
+ * \left( \begin{array}{cccc}
+ * a00 & a01 & a02 & a03 \\
+ * a10 & a11 & a12 & a13 \\
+ * a20 & a21 & a22 & a23 \\
+ * a30 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccccccccccccccccc}
+ * a00 & a10 & a20 & a30 & a01 & a11 & a21 & a31 & a02 & a12 & a22 & a32 & a03 & a13 & a23 & a33 \\
+ * \end{array} \right)
+ * @f]
+ *
+ * After this operation, the output matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ]
+ */
+class GCGEMMInterleave4x4Kernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCGEMMInterleave4x4Kernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMInterleave4x4Kernel(const GCGEMMInterleave4x4Kernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMInterleave4x4Kernel &operator=(const GCGEMMInterleave4x4Kernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCGEMMInterleave4x4Kernel(GCGEMMInterleave4x4Kernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCGEMMInterleave4x4Kernel &operator=(GCGEMMInterleave4x4Kernel &&) = default;
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  input  Input tensor. Data types supported: F32
+     * @param[out] output Output tensor. Data type supported: same as @p input
+     */
+    void configure(const IGCTensor *input, IGCTensor *output);
+
+    // Inherited methods overridden
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+};
+}
+#endif /* __ARM_COMPUTE_GCGEMMINTERLEAVE4X4KERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h
new file mode 100644
index 0000000000..20f28cbb65
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCGEMMMATRIXACCUMULATEBIASESKERNEL_H__
+#define __ARM_COMPUTE_GCGEMMMATRIXACCUMULATEBIASESKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+/** Interface to add a bias to each row of the input tensor
+ *
+ */
+class GCGEMMMatrixAccumulateBiasesKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCGEMMMatrixAccumulateBiasesKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMMatrixAccumulateBiasesKernel(const GCGEMMMatrixAccumulateBiasesKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMMatrixAccumulateBiasesKernel &operator=(const GCGEMMMatrixAccumulateBiasesKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCGEMMMatrixAccumulateBiasesKernel(GCGEMMMatrixAccumulateBiasesKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCGEMMMatrixAccumulateBiasesKernel &operator=(GCGEMMMatrixAccumulateBiasesKernel &&) = default;
+    /** Set the accumulate buffer and the biases of the kernel.
+     *
+     * @param[in, out] accum  The accumulate tensor to convert. Data types supported: F16/F32
+     * @param[in]      biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input
+     */
+    void configure(IGCTensor *accum, const IGCTensor *biases);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    IGCTensor       *_accum;
+    const IGCTensor *_biases;
+};
+}
+
+#endif /*__ARM_COMPUTE_GCGEMMMATRIXACCUMULATEBIASESKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h
new file mode 100644
index 0000000000..02abb8da76
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCGEMMMATRIXADDITIONKERNEL_H__
+#define __ARM_COMPUTE_GCGEMMMATRIXADDITIONKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** OpenGL ES kernel to perform the in-place matrix addition between 2 matrices, taking into account that the second matrix might be weighted by a scalar value beta.
+ *  The matrices must have the same dimensions
+ *
+ * @note This kernel is computed if and only if beta != 0.0.
+ */
+class GCGEMMMatrixAdditionKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCGEMMMatrixAdditionKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMMatrixAdditionKernel(const GCGEMMMatrixAdditionKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMMatrixAdditionKernel &operator=(const GCGEMMMatrixAdditionKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCGEMMMatrixAdditionKernel(GCGEMMMatrixAdditionKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCGEMMMatrixAdditionKernel &operator=(GCGEMMMatrixAdditionKernel &&) = default;
+    /** Initialise the kernel's input, output and beta value
+     *
+     * @note The input and output tensors must have the same dimensions
+     *
+     * @param[in]      input  Input tensor (Matrix C). Data types supported: F32
+     * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result (alpha * AB + beta * C), output must contain the result obtained by @ref GCGEMMMatrixMultiplyKernel. Data type supported: same as @p input
+     * @param[in]      beta   Weight of matrix C
+     */
+    void configure(const IGCTensor *input, IGCTensor *output, float beta);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+};
+}
+
+#endif /* __ARM_COMPUTE_GCGEMMMATRIXADDITIONKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h
new file mode 100644
index 0000000000..3a0b22f148
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCGEMMMATRIXMULTIPLYKERNEL_H__
+#define __ARM_COMPUTE_GCGEMMMATRIXMULTIPLYKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** GLES Compute kernel to multiply two input matrices "A" and "B" or to multiply a vector "A" by a matrix "B". All elements of the output matrix/vector will be multiplied by alpha
+ *
+ * @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref GCGEMMInterleave4x4Kernel" and @ref GCGEMMTranspose1xWKernel
+ * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped
+ *
+ * @attention The second input tensor must have at least 2 dimensions (matrix)
+ *
+ */
+class GCGEMMMatrixMultiplyKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCGEMMMatrixMultiplyKernel();
+
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMMatrixMultiplyKernel(const GCGEMMMatrixMultiplyKernel &) = delete;
+
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCGEMMMatrixMultiplyKernel &operator=(const GCGEMMMatrixMultiplyKernel &) = delete;
+
+    /** Allow instances of this class to be moved */
+    GCGEMMMatrixMultiplyKernel(GCGEMMMatrixMultiplyKernel &&) = default;
+
+    /** Allow instances of this class to be moved */
+    GCGEMMMatrixMultiplyKernel &operator=(GCGEMMMatrixMultiplyKernel &&) = default;
+
+    /** Initialise the kernel's input, output and alpha
+     *
+     * @param[in]  input0                    Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32
+     * @param[in]  input1                    Input tensor containing the transposed Matrix B if the first input tensor A is not a vector.
+     *                                       If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0
+     * @param[out] output                    Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0
+     * @param[in]  alpha                     Weight of the matrix product
+     * @param[in]  is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref GCGEMMInterleave4x4Kernel and @ref GCGEMMTranspose1xWKernel
+     */
+    void configure(const IGCTensor *input0, const IGCTensor *input1, IGCTensor *output, float alpha, bool is_interleaved_transposed = true);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input0;
+    const IGCTensor *_input1;
+    IGCTensor       *_output;
+};
+}
+#endif /* __ARM_COMPUTE_GCGEMMMATRIXMULTIPLYKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h
new file mode 100644
index 0000000000..4223556ac4
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCGEMMTRANSPOSE1XWKERNEL_H__
+#define __ARM_COMPUTE_GCGEMMTRANSPOSE1XWKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** OpenGLES kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor)
+ *
+ * Following an example of how the transposition1xW works when the input data type is F32
+ *
+ * @f[
+ * \left( \begin{array}{cccc}
+ * a00 & a01 & a02 & a03 \\
+ * a10 & a11 & a12 & a13 \\
+ * a20 & a21 & a22 & a23 \\
+ * a30 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * \rightarrow
+ * \left( \begin{array}{ccccccccccccccccc}
+ * a00 & a01 & a02 & a03 & a10 & a11 & a12 & a13 & a20 & a21 & a22 & a23 & a30 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * @f]
+ *
+ * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor)
+ *
+ */
+class GCGEMMTranspose1xWKernel : public IGCSimple2DKernel
+{
+public:
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  input  Input tensor. Data types supported: F32
+     * @param[out] output Output tensor. Data type supported: same as @p input
+     */
+    void configure(const IGCTensor *input, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+};
+}
+#endif /* __ARM_COMPUTE_GCGEMMTRANSPOSE1XWKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h
new file mode 100644
index 0000000000..e1b35607ff
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCIM2COLKERNEL_H__
+#define __ARM_COMPUTE_GCIM2COLKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the im2col reshape kernel.
+ *
+ * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column.
+ * It is used to transform a convolution to a plain matrix multiplication.
+ *
+ * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have:
+ * @f[
+ * \left( \begin{array}{cccc}
+ * a00 & a01 & a02 & a03 \\
+ * a10 & a11 & a12 & a13 \\
+ * a20 & a21 & a22 & a23 \\
+ * a30 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * =
+ * \left( \begin{array}{ccccccccc}
+ * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\
+ * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\
+ * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\
+ * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\
+ * \end{array} \right)
+ * @f]
+ */
+class GCIm2ColKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCIm2ColKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCIm2ColKernel(const GCIm2ColKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCIm2ColKernel &operator=(const GCIm2ColKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCIm2ColKernel(GCIm2ColKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCIm2ColKernel &operator=(GCIm2ColKernel &&) = default;
+    /** Set the input and output of the kernel.
+     *
+     * @param[in]  input       The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM],
+     *                         while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F32
+     * @param[out] output      The output tensor. First 2 lower dimensions represent a transform of each 3D input,
+     *                         while every dimension above represents a batch. Data types supported: Same as @p input
+     * @param[in]  kernel_dims The kernel dimensions (width and height).
+     * @param[in]  conv_info   Contains padding and stride information described in @ref PadStrideInfo.
+     * @param[in]  has_bias    In case biases are provided expands the matrix with 1.
+     */
+    void configure(const IGCTensor *input, IGCTensor *output, std::pair<unsigned int, unsigned int> kernel_dims, const PadStrideInfo &conv_info, bool has_bias);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    /** Run the reshape kernel optimised for the special case (stride is 1, padding is 0 and kernel's low 3 dimensions are same as input)
+     *
+     * @param[in]     window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+     * @param[in,out] queue  Command queue on which to enqueue the kernel.
+     */
+    void run_reduced(const Window &window);
+    /** run the generic convolution layer input reshape kernel
+     *
+     * @param[in]     window Region on which to execute the kernel. (Must be a valid region of the window returned by window()).
+     * @param[in,out] queue  Command queue on which to enqueue the kernel.
+     */
+    void run_generic(const Window &window);
+
+    /** Common signature for the kernel to run */
+    using Im2ColFunction = void (GCIm2ColKernel::*)(const Window &);
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+    std::pair<unsigned int, unsigned int> _convolved_dims;
+    unsigned int   _num_elems_processed_per_iteration;
+    Im2ColFunction _run_func;
+};
+}
+
+#endif /*__ARM_COMPUTE_GCIM2COLKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h
new file mode 100644
index 0000000000..e8bc7ad2b2
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCNORMALIZATIONLAYERKERNEL_H__
+#define __ARM_COMPUTE_GCNORMALIZATIONLAYERKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the normalization layer kernel.
+ */
+class GCNormalizationLayerKernel : public IGCKernel
+{
+public:
+    /** Constructor */
+    GCNormalizationLayerKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCNormalizationLayerKernel(const GCNormalizationLayerKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCNormalizationLayerKernel &operator=(const GCNormalizationLayerKernel &) = delete;
+    /** Default Move Constructor. */
+    GCNormalizationLayerKernel(GCNormalizationLayerKernel &&) = default;
+    /** Default move assignment operator. */
+    GCNormalizationLayerKernel &operator=(GCNormalizationLayerKernel &&) = default;
+    /** Default destrutor */
+    ~GCNormalizationLayerKernel() = default;
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input         Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
+     *                           and an optional 4th dimension for batch of inputs. Data types supported: F32.
+     * @param[in]  squared_input Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM],
+     *                           Data types should match the input type.
+     * @param[out] output        Destination tensor. Output will have the same number of dimensions as input. Data types should match the input type.
+     * @param[in]  norm_info     Normalization layer information like the normalization type, normalization size and other parameters.
+     */
+    void configure(const IGCTensor *input, const IGCTensor *squared_input, IGCTensor *output, NormalizationLayerInfo norm_info);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+    BorderSize border_size() const override;
+
+private:
+    const IGCTensor *_input;
+    const IGCTensor *_squared_input;
+    IGCTensor       *_output;
+    BorderSize       _border_size;
+};
+}
+#endif /*__ARM_COMPUTE_GCNORMALIZATIONLAYERKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h
new file mode 100644
index 0000000000..3b01b4ad4d
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCPIXELWISEMULTIPLICATIONKERNEL_H__
+#define __ARM_COMPUTE_GCPIXELWISEMULTIPLICATIONKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the pixelwise multiplication kernel.
+ *
+ */
+class GCPixelWiseMultiplicationKernel : public IGCKernel
+{
+public:
+    /** Default constructor.*/
+    GCPixelWiseMultiplicationKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    GCPixelWiseMultiplicationKernel(const GCPixelWiseMultiplicationKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    GCPixelWiseMultiplicationKernel &operator=(const GCPixelWiseMultiplicationKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCPixelWiseMultiplicationKernel(GCPixelWiseMultiplicationKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCPixelWiseMultiplicationKernel &operator=(GCPixelWiseMultiplicationKernel &&) = default;
+    /** Initialise the kernel's input, output and border mode.
+     *
+     * @param[in]  input1 An input tensor. Data types supported: F32.
+     * @param[in]  input2 An input tensor. Data types supported: same as @p input1.
+     * @param[out] output The output tensor, Data types supported: same as @p input1.
+     * @param[in]  scale  Scale to apply after multiplication.
+     *                             Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15.
+     */
+    void configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output, float scale);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input1;
+    const IGCTensor *_input2;
+    IGCTensor       *_output;
+};
+}
+
+#endif /*__ARM_COMPUTE_GCPIXELWISEMULTIPLICATIONKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h
new file mode 100644
index 0000000000..d4921c2092
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCPOOLINGLAYERKERNEL_H__
+#define __ARM_COMPUTE_GCPOOLINGLAYERKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the pooling layer kernel */
+class GCPoolingLayerKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCPoolingLayerKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCPoolingLayerKernel(const GCPoolingLayerKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCPoolingLayerKernel &operator=(const GCPoolingLayerKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCPoolingLayerKernel(GCPoolingLayerKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCPoolingLayerKernel &operator=(GCPoolingLayerKernel &&) = default;
+    /** Default destructor */
+    ~GCPoolingLayerKernel() = default;
+
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input     Source tensor. Data types supported: F16/F32.
+     * @param[out] output    Destination tensor. Data types supported: Same as @p input.
+     * @param[in]  pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+     */
+    void configure(const IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+    BorderSize border_size() const override;
+
+private:
+    const IGCTensor *_input;
+    IGCTensor       *_output;
+    PoolingLayerInfo _pool_info;
+    BorderSize       _border_size;
+    unsigned int     _num_elems_processed_per_iteration;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_GCPOOLINGLAYERKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h
new file mode 100644
index 0000000000..b9eb305bab
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCSOFTMAXLAYERKERNEL_H__
+#define __ARM_COMPUTE_GCSOFTMAXLAYERKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Interface for the identifying the max value of 1D Logits */
+class GCLogits1DMaxKernel : public IGCSimple3DKernel
+{
+public:
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input  Source tensor. Data types supported: F16/F32
+     * @param[out] output Destination tensor. Data types supported: same as @p input
+     */
+    void configure(const IGCTensor *input, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+};
+
+/** Interface for shifting the logits values around the max value and exponentiating the result */
+class GCLogits1DShiftExpSumKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCLogits1DShiftExpSumKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCLogits1DShiftExpSumKernel(const GCLogits1DShiftExpSumKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCLogits1DShiftExpSumKernel &operator=(const GCLogits1DShiftExpSumKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCLogits1DShiftExpSumKernel(GCLogits1DShiftExpSumKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCLogits1DShiftExpSumKernel &operator=(GCLogits1DShiftExpSumKernel &&) = default;
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input  Source tensor. Data types supported: F16/F32
+     * @param[in]  max    Max values tensor. Data types supported: same as @p input
+     * @param[out] output Destination tensor. Data types supported: same as @p input
+     * @param[out] sum    Sum of 1D logits tensor. Data types supported: same as @p input
+     */
+    void configure(const IGCTensor *input, const IGCTensor *max, IGCTensor *output, IGCTensor *sum);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    const IGCTensor *_max;
+    IGCTensor       *_output;
+    IGCTensor       *_sum;
+};
+
+/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */
+class GCLogits1DNormKernel : public IGCKernel
+{
+public:
+    /** Default constructor */
+    GCLogits1DNormKernel();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCLogits1DNormKernel(const GCLogits1DNormKernel &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GCLogits1DNormKernel &operator=(const GCLogits1DNormKernel &) = delete;
+    /** Allow instances of this class to be moved */
+    GCLogits1DNormKernel(GCLogits1DNormKernel &&) = default;
+    /** Allow instances of this class to be moved */
+    GCLogits1DNormKernel &operator=(GCLogits1DNormKernel &&) = default;
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input  Source tensor. Data types supported: F16/F32
+     * @param[in]  sum    Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input
+     * @param[out] output Destination tensor. Data types supported: same as @p input
+     */
+    void configure(const IGCTensor *input, const IGCTensor *sum, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+
+private:
+    const IGCTensor *_input;
+    const IGCTensor *_sum;
+    IGCTensor       *_output;
+};
+} // namespace arm_compute
+#endif /*__ARM_COMPUTE_GCSOFTMAXLAYERKERNEL_H__ */
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h
new file mode 100644
index 0000000000..c628a00585
--- /dev/null
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCTRANSPOSEKERNEL_H__
+#define __ARM_COMPUTE_GCTRANSPOSEKERNEL_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** OpenGL ES kernel which transposes the elements of a matrix.
+ *
+ * [width, height, batch] -> [height, width, batch]
+ *
+ */
+class GCTransposeKernel : public IGCSimple2DKernel
+{
+public:
+    /** Initialise the kernel's input and output.
+     *
+     * @param[in]  input  Input tensor. Data types supported: F16/F32
+     * @param[out] output Output tensor. Data type supported: Same as @p input
+     */
+    void configure(const IGCTensor *input, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run(const Window &window) override;
+};
+}
+#endif /* __ARM_COMPUTE_GCTRANSPOSEKERNEL_H__ */
diff --git a/arm_compute/core/Log.h b/arm_compute/core/Log.h
index 488c8e6190..70e7c51110 100644
--- a/arm_compute/core/Log.h
+++ b/arm_compute/core/Log.h
@@ -44,9 +44,12 @@
  * @param[in] log_level Logging level
  * @param[in] msg       Message to log
  */
-#define ARM_COMPUTE_LOG_MSG_CORE(log_level, msg) \
-    ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();    \
-    ARM_COMPUTE_LOG_MSG("CORE", log_level, msg)
+#define ARM_COMPUTE_LOG_MSG_CORE(log_level, msg)     \
+    do                                               \
+    {                                                \
+        ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();    \
+        ARM_COMPUTE_LOG_MSG("CORE", log_level, msg); \
+    } while(false)
 
 /** Log a message with format to the core system logger
  *
@@ -54,42 +57,57 @@
  * @param[in] fmt       String format (printf style)
  * @param[in] ...       Message arguments
  */
-#define ARM_COMPUTE_LOG_MSG_WITH_FORMAT_CORE(log_level, fmt, ...) \
-    ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();                     \
-    ARM_COMPUTE_LOG_MSG_WITH_FORMAT("CORE", log_level, fmt, __VA_ARGS__)
+#define ARM_COMPUTE_LOG_MSG_WITH_FORMAT_CORE(log_level, fmt, ...)             \
+    do                                                                        \
+    {                                                                         \
+        ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();                             \
+        ARM_COMPUTE_LOG_MSG_WITH_FORMAT("CORE", log_level, fmt, __VA_ARGS__); \
+    } while(false)
 
 /** Log a stream to the core system logger
  *
  * @param[in] log_level Logging level
  * @param[in] ss        Stream to log
  */
-#define ARM_COMPUTE_LOG_STREAM_CORE(log_level, ss) \
-    ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();      \
-    ARM_COMPUTE_LOG_STREAM("CORE", log_level, ss)
+#define ARM_COMPUTE_LOG_STREAM_CORE(log_level, ss)     \
+    do                                                 \
+    {                                                  \
+        ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();      \
+        ARM_COMPUTE_LOG_STREAM("CORE", log_level, ss); \
+    } while(false)
 
 /** Log information level message to the core system logger
  *
  * @param[in] msg Stream to log
  */
-#define ARM_COMPUTE_LOG_INFO_MSG_CORE(msg)    \
-    ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \
-    ARM_COMPUTE_LOG_MSG_CORE(arm_compute::logging::LogLevel::INFO, msg)
+#define ARM_COMPUTE_LOG_INFO_MSG_CORE(msg)                                   \
+    do                                                                       \
+    {                                                                        \
+        ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();                            \
+        ARM_COMPUTE_LOG_MSG_CORE(arm_compute::logging::LogLevel::INFO, msg); \
+    } while(false)
 
 /** Log information level formatted message to the core system logger
  *
  * @param[in] fmt String format (printf style)
  * @param[in] ... Message arguments
  */
-#define ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE(fmt, ...) \
-    ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();               \
-    ARM_COMPUTE_LOG_MSG_WITH_FORMAT_CORE(arm_compute::logging::LogLevel::INFO, msg)
+#define ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE(fmt, ...)                                           \
+    do                                                                                                \
+    {                                                                                                 \
+        ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();                                                     \
+        ARM_COMPUTE_LOG_MSG_WITH_FORMAT_CORE(arm_compute::logging::LogLevel::INFO, fmt, __VA_ARGS__); \
+    } while(false)
 
 /** Log information level stream to the core system logger
  *
  * @param[in] ss Message to log
  */
-#define ARM_COMPUTE_LOG_INFO_STREAM_CORE(ss)  \
-    ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \
-    ARM_COMPUTE_LOG_STREAM_CORE(arm_compute::logging::LogLevel::INFO, ss)
+#define ARM_COMPUTE_LOG_INFO_STREAM_CORE(ss)                                   \
+    do                                                                         \
+    {                                                                          \
+        ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER();                              \
+        ARM_COMPUTE_LOG_STREAM_CORE(arm_compute::logging::LogLevel::INFO, ss); \
+    } while(false)
 
 #endif /* __ARM_COMPUTE_LOGGING_MACROS_H__ */
diff --git a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h
index 04c4c9ebba..1dfe075310 100644
--- a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h
@@ -54,12 +54,12 @@ public:
      * @param[in, out] input   Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
      *                         3 lower dimensions represent a single input with dimensions [width, height, FM].
      *                         The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32.
+     * @param[out]     output  Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
      * @param[in]      mean    Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      var     Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
-     * @param[in]      gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      beta    Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]      gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      epsilon Small value to avoid division with zero.
-     * @param[out]     output  Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
      */
     void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon);
 
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index 96e99e6874..b8c5b34e5a 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -837,7 +837,16 @@ void print_consecutive_elements_impl(std::ostream &s, const T *ptr, unsigned int
         {
             s.width(stream_width);
         }
-        s << std::right << static_cast<print_type>(ptr[i]) << element_delim;
+
+        if(std::is_same<typename std::decay<T>::type, half>::value)
+        {
+            // We use T instead of print_type here is because the std::is_floating_point<half> returns false and then the print_type becomes int.
+            s << std::right << static_cast<T>(ptr[i]) << element_delim;
+        }
+        else
+        {
+            s << std::right << static_cast<print_type>(ptr[i]) << element_delim;
+        }
     }
 }
 
@@ -859,7 +868,17 @@ int max_consecutive_elements_display_width_impl(std::ostream &s, const T *ptr, u
     {
         std::stringstream ss;
         ss.copyfmt(s);
-        ss << static_cast<print_type>(ptr[i]);
+
+        if(std::is_same<typename std::decay<T>::type, half>::value)
+        {
+            // We use T instead of print_type here is because the std::is_floating_point<half> returns false and then the print_type becomes int.
+            ss << static_cast<T>(ptr[i]);
+        }
+        else
+        {
+            ss << static_cast<print_type>(ptr[i]);
+        }
+
         max_width = std::max<int>(max_width, ss.str().size());
     }
     return max_width;
diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
index 70a201a1f8..d84ba69da2 100644
--- a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h
@@ -54,8 +54,8 @@ public:
      * @param[out]     output  Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
      * @param[in]      mean    Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      var     Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
-     * @param[in]      gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      beta    Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]      gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      epsilon Small value to avoid division with zero.
      */
     void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon);
@@ -63,12 +63,12 @@ public:
      *
      * @param[in] input   Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result.
      *                    3 lower dimensions represent a single input with dimensions [width, height, FM].
-     * @param[in] output  Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input
      *                    The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32.
+     * @param[in] output  Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input
      * @param[in] mean    Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in] var     Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
-     * @param[in] gamma   Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in] beta    Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in] gamma   Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in] epsilon Small value to avoid division with zero.
      *
      * @return an error status
diff --git a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
index 0818cec2e5..1e0b27ae43 100644
--- a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h
@@ -37,7 +37,7 @@ namespace arm_compute
 {
 class ICLTensor;
 
-/** Basic function to simulate a normalization layer. This function calls the following CL kernels:
+/** Basic function to compute a normalization layer. This function calls the following CL kernels:
  *
  * -# @ref CLFillBorderKernel
  * -# @ref CLNormalizationLayerKernel
@@ -55,7 +55,7 @@ public:
      * @param[out]     output    Destination tensor. Dimensions, data type and number of channels must match the input ones.
      * @param[in]      norm_info Normalization layer information like the normalization type, normalization size and other parameters.
      */
-    void configure(ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info);
+    void configure(ICLTensor *input, ICLTensor *output, const NormalizationLayerInfo &norm_info);
 
     // Inherited methods overridden:
     void run() override;
diff --git a/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h b/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h
new file mode 100644
index 0000000000..8a345c5fab
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCFUNCTIONS_H__
+#define __ARM_COMPUTE_GCFUNCTIONS_H__
+
+/* Header regrouping all the GLES compute functions */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCFillBorder.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCTranspose.h"
+
+#endif /* __ARM_COMPUTE_GCFUNCTIONS_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/GCScheduler.h b/arm_compute/runtime/GLES_COMPUTE/GCScheduler.h
new file mode 100644
index 0000000000..817f8b54b1
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/GCScheduler.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCSCHEDULER_H__
+#define __ARM_COMPUTE_GCSCHEDULER_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class IGCKernel;
+
+/** Provides global access to a OpenGL ES context and command queue. */
+class GCScheduler
+{
+private:
+    /** Constructor */
+    GCScheduler();
+
+public:
+    /** Access the scheduler singleton.
+     *
+     * @return The scheduler
+     */
+    static GCScheduler &get();
+
+    /** Initialises the context and command queue used by the scheduler to default values
+     *  and sets a default device and kernel path for the @ref GCKernelLibrary.
+     */
+    void default_init();
+
+    /** Schedule the execution of the passed kernel if possible.
+     *
+     * @param[in] kernel Kernel to execute.
+     * @param[in] flush  (Optional) Specifies if the command queue will be flushed after running the kernel.
+     */
+    void enqueue(IGCKernel &kernel, bool flush = true);
+
+    /** Initialises the display and context to be used by the scheduler.
+     *
+     * @param[in] dpy The EGL display connection
+     * @param[in] ctx The EGL rendering context
+     */
+    void init(EGLDisplay dpy, EGLContext ctx);
+
+    /** Blocks until all commands in the associated command queue have finished. */
+    void sync();
+};
+}
+
+#endif /* __ARM_COMPUTE_GCSCHEDULER_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/GCTensor.h b/arm_compute/runtime/GLES_COMPUTE/GCTensor.h
new file mode 100644
index 0000000000..3e51f9908f
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/GCTensor.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCTENSOR_H__
+#define __ARM_COMPUTE_GCTENSOR_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+
+namespace arm_compute
+{
+class ITensorAllocator;
+class ITensorInfo;
+
+/** Interface for OpenGL ES tensor */
+class GCTensor : public IGCTensor
+{
+public:
+    /** Default constructor */
+    GCTensor();
+
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    GCTensor(const GCTensor &) = delete;
+
+    /** Prevent instances of this class from being copy assigned (As this class contains pointers). */
+    GCTensor &operator=(const GCTensor &) = delete;
+
+    /** Allow instances of this class to be moved */
+    GCTensor(GCTensor &&) = default;
+
+    /** Allow instances of this class to be moved */
+    GCTensor &operator=(GCTensor &&) = default;
+
+    /** Virtual destructor */
+    virtual ~GCTensor() = default;
+
+    /** Return a pointer to the tensor's allocator
+     *
+     * @return A pointer to the tensor's allocator
+     */
+    ITensorAllocator *allocator();
+
+    /** Enqueue a map operation of the allocated buffer on the given queue.
+     *
+     * @param[in] blocking (Optional) If true, then the mapping will be ready to use by the time
+     *                     this method returns, else it is the caller's responsibility
+     *                     to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
+     *
+     * @return The mapping address.
+     */
+    void map(bool blocking = true);
+
+    /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue.
+     *
+     * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+     *       the memory is accessed by the device.
+     *
+     */
+    void unmap();
+
+    // Inherited methods overridden:
+    TensorInfo *info() const override;
+    TensorInfo *info() override;
+    uint8_t    *buffer() const override;
+    GLuint      gc_buffer() const override;
+
+protected:
+    // Inherited methods overridden:
+    uint8_t *do_map(bool blocking) override;
+    void do_unmap() override;
+
+private:
+    mutable GCTensorAllocator _allocator;
+};
+
+using GCImage = GCTensor;
+}
+
+#endif /*__ARM_COMPUTE_GCTENSOR_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h b/arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h
new file mode 100644
index 0000000000..ce52cbbbdc
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCTENSORALLOCATOR_H__
+#define __ARM_COMPUTE_GCTENSORALLOCATOR_H__
+
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/runtime/ITensorAllocator.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+/** Basic implementation of a GLES memory tensor allocator. */
+class GCTensorAllocator : public ITensorAllocator
+{
+public:
+    /** Default constructor. */
+    GCTensorAllocator();
+
+    /** Prevent instances of this class from being copied (As this class contains pointers). */
+    GCTensorAllocator(const GCTensorAllocator &) = delete;
+
+    /** Prevent instances of this class from being copy assigned (As this class contains pointers). */
+    GCTensorAllocator &operator=(const GCTensorAllocator &) = delete;
+
+    /** Allow instances of this class to be moved */
+    GCTensorAllocator(GCTensorAllocator &&) = default;
+
+    /** Allow instances of this class to be moved */
+    GCTensorAllocator &operator=(GCTensorAllocator &&) = default;
+
+    /** Default destructor */
+    ~GCTensorAllocator() = default;
+
+    /** Interface to be implemented by the child class to return the pointer to the mapped data. */
+    uint8_t *data();
+
+    /** Get the OpenGL ES buffer object name
+     *
+     * @return The buffer object name
+     */
+    GLuint get_gl_ssbo_name() const;
+
+    /** Enqueue a map operation of the allocated buffer on the given queue.
+     *
+     * @param[in] blocking If true, then the mapping will be ready to use by the time
+     *                     this method returns, else it is the caller's responsibility
+     *                     to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer.
+     *
+     * @return The mapping address.
+     */
+    uint8_t *map(bool blocking);
+
+    /** Enqueue an unmap operation of the allocated buffer on the given queue.
+     *
+     * @note This method simply enqueue the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before
+     *       the memory is accessed by the device.
+     *
+     */
+    void unmap();
+
+    /** Allocate size specified by TensorInfo of GLES memory.
+     *
+     * @note: The tensor must not already be allocated when calling this function.
+     *
+     */
+    void allocate() override;
+
+    /** Free allocated GLES memory.
+     *
+     * @note The tensor must have been allocated when calling this function.
+     *
+     */
+    void free() override;
+
+protected:
+    /** Call map() on the SSBO.
+     *
+     * @return A pointer to the beginning of the tensor's allocation.
+     */
+    uint8_t *lock() override;
+
+    /** Call unmap() on the SSBO. */
+    void unlock() override;
+
+private:
+    class GLBufferWrapper
+    {
+    public:
+        GLBufferWrapper()
+            : _ssbo_name(0)
+        {
+            ARM_COMPUTE_GL_CHECK(glGenBuffers(1, &_ssbo_name));
+        }
+        ~GLBufferWrapper()
+        {
+            ARM_COMPUTE_GL_CHECK(glDeleteBuffers(1, &_ssbo_name));
+        }
+        GLuint _ssbo_name;
+    };
+    std::unique_ptr<GLBufferWrapper> _gl_buffer;
+    uint8_t                         *_mapping;
+};
+}
+
+#endif /* __ARM_COMPUTE_GCTENSORALLOCATOR_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h b/arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h
new file mode 100644
index 0000000000..15bbfffe95
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_IGCSIMPLEFUNCTION_H__
+#define __ARM_COMPUTE_IGCSIMPLEFUNCTION_H__
+
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+/** Basic interface for functions which have a single OpenGL ES kernel */
+class IGCSimpleFunction : public IFunction
+{
+public:
+    /** Default constructor */
+    IGCSimpleFunction();
+
+    // Inherited methods overridden:
+    void run() override final;
+
+protected:
+    std::unique_ptr<IGCKernel> _kernel;         /**< Kernel to run */
+    GCFillBorderKernel         _border_handler; /**< Kernel to handle  borders */
+};
+}
+#endif /*__ARM_COMPUTE_IGCSIMPLEFUNCTION_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.h
new file mode 100644
index 0000000000..0d4a354e26
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCABSOLUTEDIFFERENCE_H__
+#define __ARM_COMPUTE_GCABSOLUTEDIFFERENCE_H__
+
+#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to run @ref GCAbsoluteDifferenceKernel
+ *
+ * @note The tensor data types for the inputs must be U8.
+ * @note The function calculates the absolute difference also when the 2 inputs have different tensor data types.
+ */
+class GCAbsoluteDifference : public IGCSimpleFunction
+{
+public:
+    /** Initialize the function
+     *
+     * @param[in]  input1 First input tensor. Data types supported: U8
+     * @param[in]  input2 Second input tensor. Data types supported: U8
+     * @param[out] output Output tensor. Data types supported: U8
+     */
+    void configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output);
+};
+}
+
+#endif /* __ARM_COMPUTE_GCABSOLUTEDIFFERENCE_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h
new file mode 100644
index 0000000000..b43456b2cd
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCACTIVATIONLAYER_H__
+#define __ARM_COMPUTE_GCACTIVATIONLAYER_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Basic function to run @ref GCActivationLayerKernel
+ *
+ * @note The function simulates an activation layer with the specified activation function.
+ */
+class GCActivationLayer : public IGCSimpleFunction
+{
+public:
+    /** Set the input and output tensor.
+     *
+     * @note If the output tensor is a nullptr, the activation function will be performed in-place
+     *
+     * @param[in, out] input    Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
+     *                          of the activation function. Data types supported: F16/F32.
+     * @param[out]     output   Destination tensor. Data type supported: same as @p input
+     * @param[in]      act_info Activation layer parameters.
+     */
+    void configure(IGCTensor *input, IGCTensor *output, ActivationLayerInfo act_info);
+};
+}
+#endif /* __ARM_COMPUTE_GCACTIVATIONLAYER_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h
new file mode 100644
index 0000000000..9d81b9a7f7
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCBATCHNORMALIZATIONLAYER_H__
+#define __ARM_COMPUTE_GCBATCHNORMALIZATIONLAYER_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Basic function to run @ref GCBatchNormalizationLayerKernel and simulate a batch normalization layer.
+ *
+ * Batch normalization is calculated by:
+ * @f[ out_i = \gamma * (\frac{in_i - \mu_{B}}{\sqrt{\sigma^2_{B} + \epsilon}}) + \beta \equiv BN_{\gamma,\beta}(in_i) @f]
+ *
+ */
+class GCBatchNormalizationLayer : public IFunction
+{
+public:
+    /** Default constructor */
+    GCBatchNormalizationLayer();
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input   Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM].
+     *                     The rest are optional and used for representing batches. Data types supported: F16/F32.
+     * @param[out] output  Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
+     * @param[in]  mean    Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  var     Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  beta    Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]  epsilon Small value to avoid division with zero.
+     */
+    void configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta, const IGCTensor *gamma, float epsilon);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    GCBatchNormalizationLayerKernel _norm_kernel; /**< BatchNormalization layer kernel to run */
+};
+}
+#endif /* __ARM_COMPUTE_GCBATCHNORMALIZATIONLAYER_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h
new file mode 100644
index 0000000000..801dc0e111
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCDEPTHCONCATENATE_H__
+#define __ARM_COMPUTE_GCDEPTHCONCATENATE_H__
+
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/IFunction.h"
+
+#include <memory>
+#include <vector>
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels:
+ *
+ * -# @ref GCFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions)
+ * -# @ref GCDepthConcatenateKernel
+ *
+ */
+class GCDepthConcatenate : public IFunction
+{
+public:
+    /** Default constructor */
+    GCDepthConcatenate();
+    /** Initialise the kernel's inputs vector and output.
+     *
+     * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: F16/F32.
+     * @param[out]    output        Output tensor. Data types supported: Same as @p input.
+     */
+    void configure(std::vector<IGCTensor *> inputs_vector, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    std::unique_ptr<GCDepthConcatenateKernel[]> _concat_kernels_vector;
+    std::unique_ptr<GCFillBorderKernel[]>       _border_handlers_vector;
+    unsigned int                                _num_inputs;
+};
+}
+#endif /* __ARM_COMPUTE_GCDEPTHCONCATENATE_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h
new file mode 100644
index 0000000000..5472bdb9ea
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYER_H__
+#define __ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYER_H__
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Basic function to execute direct convolution function:
+ *
+ * @note Supported kernel size: 1x1, 3x3, and 5x5
+ * @note This OpenGL ES implementation works with stride_x = 1 and 2
+ */
+class GCDirectConvolutionLayer : public IGCSimpleFunction
+{
+public:
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input     Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
+     *                       while every optional dimension from 4 and above represent a batch of inputs.
+     *                       Data types supported: F16/F32.
+     * @param[in]  weights   Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input.
+     * @param[in]  biases    Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input.
+     * @param[out] output    Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
+     *                       Data types supported: Same as @p input.
+     * @param[in]  conv_info Contains padding and stride information described in @ref PadStrideInfo.
+     */
+    void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info);
+};
+}
+#endif /* __ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYER_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h
new file mode 100644
index 0000000000..6a08d96676
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCDROPOUTLAYER_H__
+#define __ARM_COMPUTE_GCDROPOUTLAYER_H__
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+/** Basic function to do dropout op. This function calls the following kernels:
+ *
+ *  -# @ref GCDropoutKernel
+ */
+class GCDropoutLayer : public IFunction
+{
+public:
+    /** Constructor */
+    GCDropoutLayer();
+
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input   Source tensor. Data type supported: F16/F32.
+     * @param[out] mask    Destination tensor. Data type supported: Same as @p input.
+     * @param[out] output  Destination tensor. Data type supported: Same as @p input.
+     * @param[in]  ratio   Dropout ratio
+     * @param[in]  forward Forward or backward propagation
+     *
+     */
+    void configure(const IGCTensor *input, IGCTensor *mask, IGCTensor *output, float ratio, bool forward);
+
+    //Inherited methods override
+    void run() override;
+
+private:
+    GCDropoutKernel _dropout_kernel;
+};
+}
+
+#endif /* __ARM_COMPUTE_GCDROPOUTLAYER_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCFillBorder.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCFillBorder.h
new file mode 100644
index 0000000000..a04e4002ff
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCFillBorder.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCFILLBORDER_H__
+#define __ARM_COMPUTE_GCFILLBORDER_H__
+
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to run @ref GCFillBorderKernel */
+class GCFillBorder : public IGCSimpleFunction
+{
+public:
+    /** Initialize the function
+     *
+     * @param[in,out] tensor                Source tensor. Data types supported: F16/F32
+     * @param[in]     border_width          The border width
+     * @param[in]     border_mode           Strategy to use for borders.
+     * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
+     */
+    void configure(IGCTensor *tensor, unsigned int border_width, BorderMode border_mode,
+                   const PixelValue &constant_border_value = PixelValue());
+};
+}
+
+#endif /*__ARM_COMPUTE_FILLBORDER_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h
new file mode 100644
index 0000000000..1ae5837de0
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCFULLYCONNECTEDLAYER_H__
+#define __ARM_COMPUTE_GCFULLYCONNECTEDLAYER_H__
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+
+namespace arm_compute
+{
+/** Basic function to reshape the weights of Fully Connected layer with OpenGL ES. This function calls the following kernels:
+ *
+ *  -# @ref GCTransposeKernel
+ *
+ * @note  The fully connected layer accepts "weights" tensors only with 2 dimensions.
+ */
+class GCFullyConnectedLayerReshapeWeights : public IGCSimpleFunction
+{
+public:
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input  Weights tensor. The weights must be 2 dimensional. Data types supported: F16/F32.
+     * @param[out] output Destination tensor which stores the transposed input tensor. Data type supported: Same as @p input.
+     */
+    void configure(const IGCTensor *input, IGCTensor *output);
+};
+
+/** Basic function to compute a Fully Connected layer on OpenGL ES. This function calls the following OpenGL ES kernels:
+ *
+ *  -# @ref GCIm2ColKernel (called when the input comes from a convolutional layer)
+ *  -# @ref GCFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once)
+ *  -# @ref GCGEMMMatrixMultiplyKernel
+ *  -# @ref GCGEMMMatrixAccumulateBiasesKernel (if @p biases is not equal to nullptr)
+ *
+ * @note  The fully connected layer accepts "weights" tensors only with 2 dimensions.
+ */
+class GCFullyConnectedLayer : public IFunction
+{
+public:
+    /** Constructor */
+    GCFullyConnectedLayer();
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input                Source tensor. Data type supported: F16/F32.
+     * @param[in]  weights              Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input
+     * @param[in]  biases               Bias tensor. It can be nullptr. Data type supported:Same as @p input.
+     * @param[out] output               Destination tensor. Data type supported: Same as @p input.
+     * @param[in]  transpose_weights    (Optional) Transpose weights if true. Defaults to true.
+     * @param[in]  are_weights_reshaped (Optional) Reshape the weights tensor if false. Defaults to false.
+     */
+    void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, bool transpose_weights = true, bool are_weights_reshaped = false);
+
+    //Inherited methods override
+    void run() override;
+
+private:
+    void configure_fc_fc(const IGCTensor *input, const IGCTensor *weights, IGCTensor *output);
+    void configure_conv_fc(const IGCTensor *input, const IGCTensor *weights, IGCTensor *output);
+
+    GCIm2ColKernel                      _im2col_kernel;
+    GCFullyConnectedLayerReshapeWeights _reshape_weights_kernel;
+    GCGEMMMatrixMultiplyKernel          _mm_kernel;
+    GCGEMMMatrixAccumulateBiasesKernel  _accumulate_biases_kernel;
+    GCTensor                            _im2col_output;
+    GCTensor                            _reshape_weights_output;
+    bool                                _are_weights_reshaped;
+    bool                                _is_fc_after_conv;
+    bool                                _accumulate_biases;
+};
+}
+#endif /* __ARM_COMPUTE_GCFULLYCONNECTEDLAYER_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h
new file mode 100644
index 0000000000..f2484cd801
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ARM_COMPUTE_GCGEMM_H__
+#define __ARM_COMPUTE_GCGEMM_H__
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Basic function to execute GEMM on OpenGLES Compute. This function calls the following kernels:
+ *
+ *  -# @ref GCGEMMInterleave4x4Kernel (if the output tensor is a matrix)
+ *  -# @ref GCGEMMTranspose1xWKernel (if the output tensor is a matrix)
+ *  -# @ref GCGEMMMatrixMultiplyKernel
+ *  -# @ref GCGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0)
+ *
+ */
+class GCGEMM : public IFunction
+{
+public:
+    /** Default constructor. */
+    GCGEMM();
+
+    /** Initialise the kernel's inputs and output
+     *
+     * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
+     *
+     * @note All tensors must have the same data type.
+     *
+     * @note Whilst the first input tensor can be a vector, the second input tensor must be at least a matrix
+     *
+     * @param[in]  a      First input tensor  (Matrix or Vector A). Data types supported: F32
+     * @param[in]  b      Second input tensor (Matrix B). Data type supported: same as @p a.
+     * @param[in]  c      Third input tensor  (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a.
+     * @param[out] output Output tensor. Data type supported: same as @p a
+     * @param[in]  alpha  Weight of the matrix product
+     * @param[in]  beta   Weight of matrix C
+     */
+    void configure(const IGCTensor *a, const IGCTensor *b, const IGCTensor *c, IGCTensor *output, float alpha, float beta);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    GCGEMMInterleave4x4Kernel  _interleave_kernel;
+    GCGEMMTranspose1xWKernel   _transpose_kernel;
+    GCGEMMMatrixMultiplyKernel _mm_kernel;
+    GCGEMMMatrixAdditionKernel _ma_kernel;
+    GCTensor                   _tmp_a;
+    GCTensor                   _tmp_b;
+    bool                       _is_interleaved_transposed;
+    bool                       _run_addition;
+};
+}
+
+#endif /* __ARM_COMPUTE_GCGEMM_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.h
new file mode 100644
index 0000000000..48fa7ed504
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCGEMMINTERLEAVE4X4_H__
+#define __ARM_COMPUTE_GCGEMMINTERLEAVE4X4_H__
+
+#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+
+namespace arm_compute
+{
+class ITensor;
+
+/** Basic function to execute GCGEMMInterleave4x4Kernel. This function calls the following OpenGL ES kernel:
+ *
+ *  -# @ref GCGEMMInterleave4x4Kernel
+ *
+ */
+class GCGEMMInterleave4x4 : public IGCSimpleFunction
+{
+public:
+    /** Initialise the kernel's inputs, output
+     *
+     * @param[in]  input  First input tensor. Data types supported: F32
+     * @param[out] output Output tensor. Data type supported: same as @p input
+     */
+    void configure(const IGCTensor *input, IGCTensor *output);
+};
+}
+
+#endif /* __ARM_COMPUTE_GCGEMMINTERLEAVE4X4_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.h
new file mode 100644
index 0000000000..24af2193c3
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCGEMMTRANSPOSE1XW_H__
+#define __ARM_COMPUTE_GCGEMMTRANSPOSE1XW_H__
+
+#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+
+namespace arm_compute
+{
+/** Basic function to execute GCGEMMTranspose1xWKernel. This function calls the following OpenGLES kernels:
+ *
+ *  -# @ref GCGEMMTranspose1xWKernel
+ *
+ */
+class GCGEMMTranspose1xW : public IGCSimpleFunction
+{
+public:
+    /** Initialise the kernel's inputs, output
+     *
+     * @param[in]  input  First input tensor. Data type supported: F32
+     * @param[out] output Output tensor. Data type supported: same as @p input
+     */
+    void configure(const IGCTensor *input, IGCTensor *output);
+};
+}
+#endif /*__ARM_COMPUTE_GCGEMMTRANSPOSE1XW_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h
new file mode 100644
index 0000000000..d080a2f7b9
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCNORMALIZATIONLAYER_H__
+#define __ARM_COMPUTE_GCNORMALIZATIONLAYER_H__
+
+#include "arm_compute/runtime/IFunction.h"
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Basic function to compute a normalization layer. This function calls the following OpenGL ES kernels:
+ *
+ * -# @ref GCPixelWiseMultiplicationKernel
+ * -# @ref GCFillBorderKernel
+ * -# @ref GCNormalizationLayerKernel
+ *
+ */
+class GCNormalizationLayer : public IFunction
+{
+public:
+    /** Default constructor */
+    GCNormalizationLayer();
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input     Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM],
+     *                       and an optional 4th dimension for batch of inputs. Data types supported: F32. Number of channels must be 1.
+     * @param[out] output    Destination tensor. Dimensions, data type and number of channels must match the input ones.
+     * @param[in]  norm_info Normalization layer information like the normalization type, normalization size and other parameters.
+     */
+    void configure(const IGCTensor *input, IGCTensor *output, const NormalizationLayerInfo &norm_info);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    GCTensor                        _squared_input;   /**< The intermediate buffer which stores results of squaring input*/
+    GCNormalizationLayerKernel      _norm_kernel;     /**< Normalization layer kernel to run */
+    GCPixelWiseMultiplicationKernel _multiply_kernel; /**< Pixel multiplication kernel to run */
+    GCFillBorderKernel              _border_handler;  /**< Kernel to handle  borders */
+};
+}
+#endif /* __ARM_COMPUTE_GCNORMALIZATIONLAYER_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.h
new file mode 100644
index 0000000000..e6239edc2f
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCPIXELWISEMULTIPLICATION_H__
+#define __ARM_COMPUTE_GCPIXELWISEMULTIPLICATION_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Basic function to run @ref GCPixelWiseMultiplicationKernel. */
+class GCPixelWiseMultiplication : public IGCSimpleFunction
+{
+public:
+    /** Initialise the kernel's inputs, output and convertion policy.
+     *
+     * @param[in]  input1 First tensor input. Data types supported: F32.
+     * @param[in]  input2 Second tensor input. Data types supported: Same as @p input1.
+     * @param[out] output Output tensor. Data types supported: Same as @p input1.
+     * @param[in]  scale  Scale to apply after multiplication. Must be a positive value.
+     */
+    void configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output, float scale);
+};
+}
+#endif /*__ARM_COMPUTE_GCPIXELWISEMULTIPLICATION_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h
new file mode 100644
index 0000000000..cce44d0c3c
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCPOOLINGLAYER_H__
+#define __ARM_COMPUTE_GCPOOLINGLAYER_H__
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+
+#include <memory>
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following OpenGL ES kernels:
+ *
+ * -# @ref GCFillBorderKernel (executed if padding size is different from zero)
+ * -# @ref GCPoolingLayerKernel
+ */
+class GCPoolingLayer : public IGCSimpleFunction
+{
+public:
+    /** Set the input and output tensors.
+     *
+     * @param[in,out] input     Source tensor. (Written to only when padding != 0) Data types supported: F16/F32.
+     * @param[out]    output    Destination tensor. Data types supported: Same as @p input.
+     * @param[in]     pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+     */
+    void configure(IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info);
+};
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_GCPOOLINGLAYER_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h
new file mode 100644
index 0000000000..19bfb83eca
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCSOFTMAXLAYER_H__
+#define __ARM_COMPUTE_GCSOFTMAXLAYER_H__
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/IFunction.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Basic function to compute a SoftmaxLayer.
+ *
+ * Softmax is calculated by :
+ * @f[ out = exp(x - max(x)) / sum(exp(x - max(x))) @f]
+ *
+ * This function runs the following kernels:
+ * -# @ref GCLogits1DMaxKernel
+ * -# @ref GCLogits1DShiftExpSumKernel
+ * -# @ref GCLogits1DNormKernel
+ */
+class GCSoftmaxLayer : public IFunction
+{
+public:
+    /** Constructor */
+    GCSoftmaxLayer();
+    /** Set the input and output tensors.
+     *
+     * @param[in]  input  Source tensor. Data types supported: F16/F32
+     * @param[out] output Destination tensor. Data types supported: same as @p input
+     */
+    void configure(const IGCTensor *input, IGCTensor *output);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    GCLogits1DMaxKernel         _max_kernel;
+    GCLogits1DShiftExpSumKernel _shift_exp_sum_kernel;
+    GCLogits1DNormKernel        _norm_kernel;
+    GCTensor                    _max;
+    GCTensor                    _sum;
+    GCTensor                    _tmp;
+};
+}
+#endif /* __ARM_COMPUTE_GCSOFTMAXLAYER_H__ */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCTranspose.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCTranspose.h
new file mode 100644
index 0000000000..23324343f9
--- /dev/null
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCTranspose.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_GCTRANSPOSE_H__
+#define __ARM_COMPUTE_GCTRANSPOSE_H__
+
+#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+
+namespace arm_compute
+{
+class IGCTensor;
+
+/** Basic function to transpose a matrix on OpenGL ES. This function calls the following OpenGL ES kernel:
+ *
+ *  -# @ref GCTransposeKernel
+ *
+ */
+class GCTranspose : public IGCSimpleFunction
+{
+public:
+    /** Initialise the kernel's inputs and output
+     *
+     * @param[in]  input  Input tensor. Data types supported: F16/F32
+     * @param[out] output Output tensor. Data type supported: Same as @p input
+     */
+    void configure(const IGCTensor *input, IGCTensor *output);
+};
+}
+
+#endif /* __ARM_COMPUTE_GCTRANSPOSE_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
index 041b9e7290..b2de7162f1 100644
--- a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h
@@ -50,12 +50,12 @@ public:
      * @param[in, out] input   Source tensor. In case of @p output tensor = nullptr, this tensor will store the result.
      *                         3 lower dimensions represent a single input with dimensions [width, height, FM].
      *                         The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32.
+     * @param[out]     output  Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
      * @param[in]      mean    Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      var     Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
-     * @param[in]      gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      beta    Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
+     * @param[in]      gamma   Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input
      * @param[in]      epsilon Small value to avoid division with zero.
-     * @param[out]     output  Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input
      */
     void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon);
 
diff --git a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
index 1c95c5bc4a..0d5656d602 100644
--- a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
+++ b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h
@@ -40,7 +40,7 @@ namespace arm_compute
 {
 class ITensor;
 
-/** Basic function to simulate a normalization layer. This function calls the following NEON kernels:
+/** Basic function to compute a normalization layer. This function calls the following NEON kernels:
  *
  * -# @ref NEPixelWiseMultiplicationKernel
  * -# @ref NEFillBorderKernel
@@ -59,7 +59,7 @@ public:
      * @param[out] output    Destination with the same dimensions, data type and number of channels of  @p input
      * @param[in]  norm_info Normalization layer information like the normalization type, normalization size and other parameters.
      */
-    void configure(const ITensor *input, ITensor *output, NormalizationLayerInfo norm_info);
+    void configure(const ITensor *input, ITensor *output, const NormalizationLayerInfo &norm_info);
 
     // Inherited methods overridden:
     void run() override;
diff --git a/examples/SConscript b/examples/SConscript
index 52d2f26cfe..90b271d473 100644
--- a/examples/SConscript
+++ b/examples/SConscript
@@ -27,11 +27,18 @@ Import('env')
 if env['opencl']:
     Import('opencl')
 
+if env['gles_compute'] and env['os'] != 'android':
+    Import('egl')
+    Import('glesv2')
+
 examples_env = env.Clone()
 
 examples_env.Append(CPPPATH = ["#"])
 examples_env.Append(LIBPATH = ["#build/%s/opencl-1.2-stubs" % env['build_dir']])
 
+if env['gles_compute'] and env['os'] != 'android':
+    examples_env.Append(LIBPATH = ["#build/%s/opengles-3.1/stubs" % env['build_dir']])
+
 # Build examples
 utils = examples_env.Object("../utils/Utils.cpp")
 
@@ -86,3 +93,19 @@ if env['neon']:
         Depends(prog, arm_compute_dependency)
         alias = examples_env.Alias(example, prog)
         Default(alias)
+
+if env['gles_compute']:
+    for file in Glob("./gc_*.cpp"):
+        example = os.path.basename(os.path.splitext(str(file))[0])
+        if env['os'] != 'android':
+            examples_env.Append(CPPPATH = ["#opengles-3.1/include", "#opengles-3.1/mali_include"])
+            prog = examples_env.Program(example, ["{}.cpp".format(example), utils], CPPDEFINES=['ARM_COMPUTE_GC'], LIBS = [arm_compute_libs, "EGL", "GLESv2"])
+            Depends(prog, [arm_compute_dependency, egl, glesv2])
+        else:
+            if env['arch'] != 'armv7a':
+                prog = examples_env.Program(example, ["{}.cpp".format(example), utils], CPPDEFINES=['ARM_COMPUTE_GC'], LIBS = [arm_compute_libs, "EGL", "GLESv3"])
+            else:
+                prog = examples_env.Program(example, ["{}.cpp".format(example), utils], CPPDEFINES=['ARM_COMPUTE_GC'], LIBS = [arm_compute_libs, "EGL", "GLESv2"])
+            Depends(prog, [arm_compute_dependency])
+        alias = examples_env.Alias(example, prog)
+        Default(alias)
diff --git a/examples/gc_absdiff.cpp b/examples/gc_absdiff.cpp
new file mode 100644
index 0000000000..cd3e42989c
--- /dev/null
+++ b/examples/gc_absdiff.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_GC /* Needed by Utils.cpp to handle OpenGL ES exceptions properly */
+#error "This example needs to be built with -DARM_COMPUTE_GC"
+#endif /* ARM_COMPUTE_GC */
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCFunctions.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+#include "utils/Utils.h"
+
+using namespace arm_compute;
+using namespace utils;
+
+void main_gc_absdiff(int argc, const char **argv)
+{
+    PPMLoader ppm1, ppm2;
+    GCImage   src1, src2, dst;
+    GCScheduler::get().default_init();
+    if(argc < 2)
+    {
+        // Print help
+        std::cout << "Usage: " << argv[0] << " [input0_image.ppm] [input1_image.ppm] \n\n";
+        std::cout << "No input_image provided, creating two dummy 640x480 images\n";
+        // Create two empty grayscale 640x480 images
+        src1.allocator()->init(TensorInfo(640, 480, Format::U8));
+        src2.allocator()->init(TensorInfo(640, 480, Format::U8));
+    }
+    else if(argc < 3)
+    {
+        // Print help
+        std::cout << "Usage: " << argv[0] << " [input0_image.ppm] [input1_image.ppm] \n\n";
+        std::cout << "Only one input_image provided, creating a dummy 640x480 image\n";
+        ppm1.open(argv[1]);
+        ppm1.init_image(src1, Format::U8);
+        // Create an empty grayscale 640x480 image
+        src2.allocator()->init(TensorInfo(640, 480, Format::U8));
+    }
+    else
+    {
+        ppm1.open(argv[1]);
+        ppm1.init_image(src1, Format::U8);
+        ppm2.open(argv[2]);
+        ppm2.init_image(src2, Format::U8);
+    }
+
+    // Configure the temporary and destination images
+    dst.allocator()->init(*src1.info());
+
+    GCAbsoluteDifference absdiff;
+    absdiff.configure(&src1, &src2, &dst);
+
+    // Allocate all the images
+    src1.allocator()->allocate();
+    src2.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    // Fill the input image with the content of the PPM image if a filename was provided:
+    if(ppm1.is_open())
+    {
+        ppm1.fill_image(src1);
+    }
+    if(ppm2.is_open())
+    {
+        ppm2.fill_image(src2);
+    }
+
+    // Execute the functions:
+    absdiff.run();
+
+    // Make sure all the jobs are done executing:
+    GCScheduler::get().sync();
+
+    // Save the result to file:
+    if(ppm1.is_open())
+    {
+        const std::string output_filename = std::string(argv[1]) + "_out.ppm";
+        save_to_ppm(dst, output_filename); // save_to_ppm maps and unmaps the image to store as PPM
+    }
+}
+
+/** Main program for absdiff test
+ *
+ * @param[in] argc Number of arguments
+ * @param[in] argv Arguments ( [optional] Path to the first PPM image to process, [optional] Path the the second PPM image to process )
+ */
+int main(int argc, const char **argv)
+{
+    return utils::run_example(argc, argv, main_gc_absdiff);
+}
diff --git a/opengles-3.1/include/EGL/egl.h b/opengles-3.1/include/EGL/egl.h
new file mode 100644
index 0000000000..13ccde3558
--- /dev/null
+++ b/opengles-3.1/include/EGL/egl.h
@@ -0,0 +1,303 @@
+#ifndef __egl_h_
+#define __egl_h_ 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** Copyright (c) 2013-2014 The Khronos Group Inc.
+**
+** Permission is hereby granted, free of charge, to any person obtaining a
+** copy of this software and/or associated documentation files (the
+** "Materials"), to deal in the Materials without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Materials, and to
+** permit persons to whom the Materials are furnished to do so, subject to
+** the following conditions:
+**
+** The above copyright notice and this permission notice shall be included
+** in all copies or substantial portions of the Materials.
+**
+** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+*/
+/*
+** This header is generated from the Khronos OpenGL / OpenGL ES XML
+** API Registry. The current version of the Registry, generator scripts
+** used to make the header, and the header can be found at
+**   http://www.opengl.org/registry/
+**
+** Khronos $Revision: 28371 $ on $Date: 2014-10-01 09:16:09 -0700 (Wed, 01 Oct 2014) $
+*/
+
+#include <EGL/eglplatform.h>
+
+/* Generated on date 20141001 */
+
+/* Generated C header for:
+ * API: egl
+ * Versions considered: .*
+ * Versions emitted: .*
+ * Default extensions included: None
+ * Additional extensions included: _nomatch_^
+ * Extensions removed: _nomatch_^
+ */
+
+#ifndef EGL_VERSION_1_0
+#define EGL_VERSION_1_0 1
+typedef unsigned int EGLBoolean;
+typedef void *EGLDisplay;
+#include <KHR/khrplatform.h>
+#include <EGL/eglplatform.h>
+typedef void *EGLConfig;
+typedef void *EGLSurface;
+typedef void *EGLContext;
+typedef void (*__eglMustCastToProperFunctionPointerType)(void);
+#define EGL_ALPHA_SIZE                    0x3021
+#define EGL_BAD_ACCESS                    0x3002
+#define EGL_BAD_ALLOC                     0x3003
+#define EGL_BAD_ATTRIBUTE                 0x3004
+#define EGL_BAD_CONFIG                    0x3005
+#define EGL_BAD_CONTEXT                   0x3006
+#define EGL_BAD_CURRENT_SURFACE           0x3007
+#define EGL_BAD_DISPLAY                   0x3008
+#define EGL_BAD_MATCH                     0x3009
+#define EGL_BAD_NATIVE_PIXMAP             0x300A
+#define EGL_BAD_NATIVE_WINDOW             0x300B
+#define EGL_BAD_PARAMETER                 0x300C
+#define EGL_BAD_SURFACE                   0x300D
+#define EGL_BLUE_SIZE                     0x3022
+#define EGL_BUFFER_SIZE                   0x3020
+#define EGL_CONFIG_CAVEAT                 0x3027
+#define EGL_CONFIG_ID                     0x3028
+#define EGL_CORE_NATIVE_ENGINE            0x305B
+#define EGL_DEPTH_SIZE                    0x3025
+#define EGL_DONT_CARE                     ((EGLint)-1)
+#define EGL_DRAW                          0x3059
+#define EGL_EXTENSIONS                    0x3055
+#define EGL_FALSE                         0
+#define EGL_GREEN_SIZE                    0x3023
+#define EGL_HEIGHT                        0x3056
+#define EGL_LARGEST_PBUFFER               0x3058
+#define EGL_LEVEL                         0x3029
+#define EGL_MAX_PBUFFER_HEIGHT            0x302A
+#define EGL_MAX_PBUFFER_PIXELS            0x302B
+#define EGL_MAX_PBUFFER_WIDTH             0x302C
+#define EGL_NATIVE_RENDERABLE             0x302D
+#define EGL_NATIVE_VISUAL_ID              0x302E
+#define EGL_NATIVE_VISUAL_TYPE            0x302F
+#define EGL_NONE                          0x3038
+#define EGL_NON_CONFORMANT_CONFIG         0x3051
+#define EGL_NOT_INITIALIZED               0x3001
+#define EGL_NO_CONTEXT                    ((EGLContext)0)
+#define EGL_NO_DISPLAY                    ((EGLDisplay)0)
+#define EGL_NO_SURFACE                    ((EGLSurface)0)
+#define EGL_PBUFFER_BIT                   0x0001
+#define EGL_PIXMAP_BIT                    0x0002
+#define EGL_READ                          0x305A
+#define EGL_RED_SIZE                      0x3024
+#define EGL_SAMPLES                       0x3031
+#define EGL_SAMPLE_BUFFERS                0x3032
+#define EGL_SLOW_CONFIG                   0x3050
+#define EGL_STENCIL_SIZE                  0x3026
+#define EGL_SUCCESS                       0x3000
+#define EGL_SURFACE_TYPE                  0x3033
+#define EGL_TRANSPARENT_BLUE_VALUE        0x3035
+#define EGL_TRANSPARENT_GREEN_VALUE       0x3036
+#define EGL_TRANSPARENT_RED_VALUE         0x3037
+#define EGL_TRANSPARENT_RGB               0x3052
+#define EGL_TRANSPARENT_TYPE              0x3034
+#define EGL_TRUE                          1
+#define EGL_VENDOR                        0x3053
+#define EGL_VERSION                       0x3054
+#define EGL_WIDTH                         0x3057
+#define EGL_WINDOW_BIT                    0x0004
+EGLAPI EGLBoolean EGLAPIENTRY eglChooseConfig (EGLDisplay dpy, const EGLint *attrib_list, EGLConfig *configs, EGLint config_size, EGLint *num_config);
+EGLAPI EGLBoolean EGLAPIENTRY eglCopyBuffers (EGLDisplay dpy, EGLSurface surface, EGLNativePixmapType target);
+EGLAPI EGLContext EGLAPIENTRY eglCreateContext (EGLDisplay dpy, EGLConfig config, EGLContext share_context, const EGLint *attrib_list);
+EGLAPI EGLSurface EGLAPIENTRY eglCreatePbufferSurface (EGLDisplay dpy, EGLConfig config, const EGLint *attrib_list);
+EGLAPI EGLSurface EGLAPIENTRY eglCreatePixmapSurface (EGLDisplay dpy, EGLConfig config, EGLNativePixmapType pixmap, const EGLint *attrib_list);
+EGLAPI EGLSurface EGLAPIENTRY eglCreateWindowSurface (EGLDisplay dpy, EGLConfig config, EGLNativeWindowType win, const EGLint *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglDestroyContext (EGLDisplay dpy, EGLContext ctx);
+EGLAPI EGLBoolean EGLAPIENTRY eglDestroySurface (EGLDisplay dpy, EGLSurface surface);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetConfigAttrib (EGLDisplay dpy, EGLConfig config, EGLint attribute, EGLint *value);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetConfigs (EGLDisplay dpy, EGLConfig *configs, EGLint config_size, EGLint *num_config);
+EGLAPI EGLDisplay EGLAPIENTRY eglGetCurrentDisplay (void);
+EGLAPI EGLSurface EGLAPIENTRY eglGetCurrentSurface (EGLint readdraw);
+EGLAPI EGLDisplay EGLAPIENTRY eglGetDisplay (EGLNativeDisplayType display_id);
+EGLAPI EGLint EGLAPIENTRY eglGetError (void);
+EGLAPI __eglMustCastToProperFunctionPointerType EGLAPIENTRY eglGetProcAddress (const char *procname);
+EGLAPI EGLBoolean EGLAPIENTRY eglInitialize (EGLDisplay dpy, EGLint *major, EGLint *minor);
+EGLAPI EGLBoolean EGLAPIENTRY eglMakeCurrent (EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext ctx);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryContext (EGLDisplay dpy, EGLContext ctx, EGLint attribute, EGLint *value);
+EGLAPI const char *EGLAPIENTRY eglQueryString (EGLDisplay dpy, EGLint name);
+EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurface (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint *value);
+EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffers (EGLDisplay dpy, EGLSurface surface);
+EGLAPI EGLBoolean EGLAPIENTRY eglTerminate (EGLDisplay dpy);
+EGLAPI EGLBoolean EGLAPIENTRY eglWaitGL (void);
+EGLAPI EGLBoolean EGLAPIENTRY eglWaitNative (EGLint engine);
+#endif /* EGL_VERSION_1_0 */
+
+#ifndef EGL_VERSION_1_1
+#define EGL_VERSION_1_1 1
+#define EGL_BACK_BUFFER                   0x3084
+#define EGL_BIND_TO_TEXTURE_RGB           0x3039
+#define EGL_BIND_TO_TEXTURE_RGBA          0x303A
+#define EGL_CONTEXT_LOST                  0x300E
+#define EGL_MIN_SWAP_INTERVAL             0x303B
+#define EGL_MAX_SWAP_INTERVAL             0x303C
+#define EGL_MIPMAP_TEXTURE                0x3082
+#define EGL_MIPMAP_LEVEL                  0x3083
+#define EGL_NO_TEXTURE                    0x305C
+#define EGL_TEXTURE_2D                    0x305F
+#define EGL_TEXTURE_FORMAT                0x3080
+#define EGL_TEXTURE_RGB                   0x305D
+#define EGL_TEXTURE_RGBA                  0x305E
+#define EGL_TEXTURE_TARGET                0x3081
+EGLAPI EGLBoolean EGLAPIENTRY eglBindTexImage (EGLDisplay dpy, EGLSurface surface, EGLint buffer);
+EGLAPI EGLBoolean EGLAPIENTRY eglReleaseTexImage (EGLDisplay dpy, EGLSurface surface, EGLint buffer);
+EGLAPI EGLBoolean EGLAPIENTRY eglSurfaceAttrib (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint value);
+EGLAPI EGLBoolean EGLAPIENTRY eglSwapInterval (EGLDisplay dpy, EGLint interval);
+#endif /* EGL_VERSION_1_1 */
+
+#ifndef EGL_VERSION_1_2
+#define EGL_VERSION_1_2 1
+typedef unsigned int EGLenum;
+typedef void *EGLClientBuffer;
+#define EGL_ALPHA_FORMAT                  0x3088
+#define EGL_ALPHA_FORMAT_NONPRE           0x308B
+#define EGL_ALPHA_FORMAT_PRE              0x308C
+#define EGL_ALPHA_MASK_SIZE               0x303E
+#define EGL_BUFFER_PRESERVED              0x3094
+#define EGL_BUFFER_DESTROYED              0x3095
+#define EGL_CLIENT_APIS                   0x308D
+#define EGL_COLORSPACE                    0x3087
+#define EGL_COLORSPACE_sRGB               0x3089
+#define EGL_COLORSPACE_LINEAR             0x308A
+#define EGL_COLOR_BUFFER_TYPE             0x303F
+#define EGL_CONTEXT_CLIENT_TYPE           0x3097
+#define EGL_DISPLAY_SCALING               10000
+#define EGL_HORIZONTAL_RESOLUTION         0x3090
+#define EGL_LUMINANCE_BUFFER              0x308F
+#define EGL_LUMINANCE_SIZE                0x303D
+#define EGL_OPENGL_ES_BIT                 0x0001
+#define EGL_OPENVG_BIT                    0x0002
+#define EGL_OPENGL_ES_API                 0x30A0
+#define EGL_OPENVG_API                    0x30A1
+#define EGL_OPENVG_IMAGE                  0x3096
+#define EGL_PIXEL_ASPECT_RATIO            0x3092
+#define EGL_RENDERABLE_TYPE               0x3040
+#define EGL_RENDER_BUFFER                 0x3086
+#define EGL_RGB_BUFFER                    0x308E
+#define EGL_SINGLE_BUFFER                 0x3085
+#define EGL_SWAP_BEHAVIOR                 0x3093
+#define EGL_UNKNOWN                       ((EGLint)-1)
+#define EGL_VERTICAL_RESOLUTION           0x3091
+EGLAPI EGLBoolean EGLAPIENTRY eglBindAPI (EGLenum api);
+EGLAPI EGLenum EGLAPIENTRY eglQueryAPI (void);
+EGLAPI EGLSurface EGLAPIENTRY eglCreatePbufferFromClientBuffer (EGLDisplay dpy, EGLenum buftype, EGLClientBuffer buffer, EGLConfig config, const EGLint *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglReleaseThread (void);
+EGLAPI EGLBoolean EGLAPIENTRY eglWaitClient (void);
+#endif /* EGL_VERSION_1_2 */
+
+#ifndef EGL_VERSION_1_3
+#define EGL_VERSION_1_3 1
+#define EGL_CONFORMANT                    0x3042
+#define EGL_CONTEXT_CLIENT_VERSION        0x3098
+#define EGL_MATCH_NATIVE_PIXMAP           0x3041
+#define EGL_OPENGL_ES2_BIT                0x0004
+#define EGL_VG_ALPHA_FORMAT               0x3088
+#define EGL_VG_ALPHA_FORMAT_NONPRE        0x308B
+#define EGL_VG_ALPHA_FORMAT_PRE           0x308C
+#define EGL_VG_ALPHA_FORMAT_PRE_BIT       0x0040
+#define EGL_VG_COLORSPACE                 0x3087
+#define EGL_VG_COLORSPACE_sRGB            0x3089
+#define EGL_VG_COLORSPACE_LINEAR          0x308A
+#define EGL_VG_COLORSPACE_LINEAR_BIT      0x0020
+#endif /* EGL_VERSION_1_3 */
+
+#ifndef EGL_VERSION_1_4
+#define EGL_VERSION_1_4 1
+#define EGL_DEFAULT_DISPLAY               ((EGLNativeDisplayType)0)
+#define EGL_MULTISAMPLE_RESOLVE_BOX_BIT   0x0200
+#define EGL_MULTISAMPLE_RESOLVE           0x3099
+#define EGL_MULTISAMPLE_RESOLVE_DEFAULT   0x309A
+#define EGL_MULTISAMPLE_RESOLVE_BOX       0x309B
+#define EGL_OPENGL_API                    0x30A2
+#define EGL_OPENGL_BIT                    0x0008
+#define EGL_SWAP_BEHAVIOR_PRESERVED_BIT   0x0400
+EGLAPI EGLContext EGLAPIENTRY eglGetCurrentContext (void);
+#endif /* EGL_VERSION_1_4 */
+
+#ifndef EGL_VERSION_1_5
+#define EGL_VERSION_1_5 1
+typedef void *EGLSync;
+typedef intptr_t EGLAttrib;
+typedef khronos_utime_nanoseconds_t EGLTime;
+typedef void *EGLImage;
+#define EGL_CONTEXT_MAJOR_VERSION         0x3098
+#define EGL_CONTEXT_MINOR_VERSION         0x30FB
+#define EGL_CONTEXT_OPENGL_PROFILE_MASK   0x30FD
+#define EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY 0x31BD
+#define EGL_NO_RESET_NOTIFICATION         0x31BE
+#define EGL_LOSE_CONTEXT_ON_RESET         0x31BF
+#define EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT 0x00000001
+#define EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT 0x00000002
+#define EGL_CONTEXT_OPENGL_DEBUG          0x31B0
+#define EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE 0x31B1
+#define EGL_CONTEXT_OPENGL_ROBUST_ACCESS  0x31B2
+#define EGL_OPENGL_ES3_BIT                0x00000040
+#define EGL_CL_EVENT_HANDLE               0x309C
+#define EGL_SYNC_CL_EVENT                 0x30FE
+#define EGL_SYNC_CL_EVENT_COMPLETE        0x30FF
+#define EGL_SYNC_PRIOR_COMMANDS_COMPLETE  0x30F0
+#define EGL_SYNC_TYPE                     0x30F7
+#define EGL_SYNC_STATUS                   0x30F1
+#define EGL_SYNC_CONDITION                0x30F8
+#define EGL_SIGNALED                      0x30F2
+#define EGL_UNSIGNALED                    0x30F3
+#define EGL_SYNC_FLUSH_COMMANDS_BIT       0x0001
+#define EGL_FOREVER                       0xFFFFFFFFFFFFFFFFull
+#define EGL_TIMEOUT_EXPIRED               0x30F5
+#define EGL_CONDITION_SATISFIED           0x30F6
+#define EGL_NO_SYNC                       ((EGLSync)0)
+#define EGL_SYNC_FENCE                    0x30F9
+#define EGL_GL_COLORSPACE                 0x309D
+#define EGL_GL_COLORSPACE_SRGB            0x3089
+#define EGL_GL_COLORSPACE_LINEAR          0x308A
+#define EGL_GL_RENDERBUFFER               0x30B9
+#define EGL_GL_TEXTURE_2D                 0x30B1
+#define EGL_GL_TEXTURE_LEVEL              0x30BC
+#define EGL_GL_TEXTURE_3D                 0x30B2
+#define EGL_GL_TEXTURE_ZOFFSET            0x30BD
+#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x30B3
+#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x30B4
+#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x30B5
+#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x30B6
+#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x30B7
+#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x30B8
+#define EGL_IMAGE_PRESERVED               0x30D2
+#define EGL_NO_IMAGE                      ((EGLImage)0)
+EGLAPI EGLSync EGLAPIENTRY eglCreateSync (EGLDisplay dpy, EGLenum type, const EGLAttrib *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglDestroySync (EGLDisplay dpy, EGLSync sync);
+EGLAPI EGLint EGLAPIENTRY eglClientWaitSync (EGLDisplay dpy, EGLSync sync, EGLint flags, EGLTime timeout);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncAttrib (EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLAttrib *value);
+EGLAPI EGLImage EGLAPIENTRY eglCreateImage (EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLint *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglDestroyImage (EGLDisplay dpy, EGLImage image);
+EGLAPI EGLDisplay EGLAPIENTRY eglGetPlatformDisplay (EGLenum platform, void *native_display, const EGLAttrib *attrib_list);
+EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformWindowSurface (EGLDisplay dpy, EGLConfig config, void *native_window, const EGLAttrib *attrib_list);
+EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformPixmapSurface (EGLDisplay dpy, EGLConfig config, void *native_pixmap, const EGLAttrib *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglWaitSync (EGLDisplay dpy, EGLSync sync, EGLint flags);
+#endif /* EGL_VERSION_1_5 */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/opengles-3.1/include/EGL/eglext.h b/opengles-3.1/include/EGL/eglext.h
new file mode 100644
index 0000000000..d8e5ab537e
--- /dev/null
+++ b/opengles-3.1/include/EGL/eglext.h
@@ -0,0 +1,804 @@
+#ifndef __eglext_h_
+#define __eglext_h_ 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** Copyright (c) 2013-2014 The Khronos Group Inc.
+**
+** Permission is hereby granted, free of charge, to any person obtaining a
+** copy of this software and/or associated documentation files (the
+** "Materials"), to deal in the Materials without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Materials, and to
+** permit persons to whom the Materials are furnished to do so, subject to
+** the following conditions:
+**
+** The above copyright notice and this permission notice shall be included
+** in all copies or substantial portions of the Materials.
+**
+** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+*/
+/*
+** This header is generated from the Khronos OpenGL / OpenGL ES XML
+** API Registry. The current version of the Registry, generator scripts
+** used to make the header, and the header can be found at
+**   http://www.opengl.org/registry/
+**
+** Khronos $Revision: 28371 $ on $Date: 2014-10-01 09:16:09 -0700 (Wed, 01 Oct 2014) $
+*/
+
+#include <EGL/eglplatform.h>
+
+#define EGL_EGLEXT_VERSION 20141001
+
+/* Generated C header for:
+ * API: egl
+ * Versions considered: .*
+ * Versions emitted: _nomatch_^
+ * Default extensions included: egl
+ * Additional extensions included: _nomatch_^
+ * Extensions removed: _nomatch_^
+ */
+
+#ifndef EGL_KHR_cl_event
+#define EGL_KHR_cl_event 1
+#define EGL_CL_EVENT_HANDLE_KHR           0x309C
+#define EGL_SYNC_CL_EVENT_KHR             0x30FE
+#define EGL_SYNC_CL_EVENT_COMPLETE_KHR    0x30FF
+#endif /* EGL_KHR_cl_event */
+
+#ifndef EGL_KHR_cl_event2
+#define EGL_KHR_cl_event2 1
+typedef void *EGLSyncKHR;
+typedef intptr_t EGLAttribKHR;
+typedef EGLSyncKHR (EGLAPIENTRYP PFNEGLCREATESYNC64KHRPROC) (EGLDisplay dpy, EGLenum type, const EGLAttribKHR *attrib_list);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateSync64KHR (EGLDisplay dpy, EGLenum type, const EGLAttribKHR *attrib_list);
+#endif
+#endif /* EGL_KHR_cl_event2 */
+
+#ifndef EGL_KHR_client_get_all_proc_addresses
+#define EGL_KHR_client_get_all_proc_addresses 1
+#endif /* EGL_KHR_client_get_all_proc_addresses */
+
+#ifndef EGL_KHR_config_attribs
+#define EGL_KHR_config_attribs 1
+#define EGL_CONFORMANT_KHR                0x3042
+#define EGL_VG_COLORSPACE_LINEAR_BIT_KHR  0x0020
+#define EGL_VG_ALPHA_FORMAT_PRE_BIT_KHR   0x0040
+#endif /* EGL_KHR_config_attribs */
+
+#ifndef EGL_KHR_create_context
+#define EGL_KHR_create_context 1
+#define EGL_CONTEXT_MAJOR_VERSION_KHR     0x3098
+#define EGL_CONTEXT_MINOR_VERSION_KHR     0x30FB
+#define EGL_CONTEXT_FLAGS_KHR             0x30FC
+#define EGL_CONTEXT_OPENGL_PROFILE_MASK_KHR 0x30FD
+#define EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY_KHR 0x31BD
+#define EGL_NO_RESET_NOTIFICATION_KHR     0x31BE
+#define EGL_LOSE_CONTEXT_ON_RESET_KHR     0x31BF
+#define EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR  0x00000001
+#define EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE_BIT_KHR 0x00000002
+#define EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR 0x00000004
+#define EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT_KHR 0x00000001
+#define EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT_KHR 0x00000002
+#define EGL_OPENGL_ES3_BIT_KHR            0x00000040
+#endif /* EGL_KHR_create_context */
+
+#ifndef EGL_KHR_fence_sync
+#define EGL_KHR_fence_sync 1
+#ifdef KHRONOS_SUPPORT_INT64
+#define EGL_SYNC_PRIOR_COMMANDS_COMPLETE_KHR 0x30F0
+#define EGL_SYNC_CONDITION_KHR            0x30F8
+#define EGL_SYNC_FENCE_KHR                0x30F9
+#endif /* KHRONOS_SUPPORT_INT64 */
+#endif /* EGL_KHR_fence_sync */
+
+#ifndef EGL_KHR_get_all_proc_addresses
+#define EGL_KHR_get_all_proc_addresses 1
+#endif /* EGL_KHR_get_all_proc_addresses */
+
+#ifndef EGL_KHR_gl_colorspace
+#define EGL_KHR_gl_colorspace 1
+#define EGL_GL_COLORSPACE_KHR             0x309D
+#define EGL_GL_COLORSPACE_SRGB_KHR        0x3089
+#define EGL_GL_COLORSPACE_LINEAR_KHR      0x308A
+#endif /* EGL_KHR_gl_colorspace */
+
+#ifndef EGL_KHR_gl_renderbuffer_image
+#define EGL_KHR_gl_renderbuffer_image 1
+#define EGL_GL_RENDERBUFFER_KHR           0x30B9
+#endif /* EGL_KHR_gl_renderbuffer_image */
+
+#ifndef EGL_KHR_gl_texture_2D_image
+#define EGL_KHR_gl_texture_2D_image 1
+#define EGL_GL_TEXTURE_2D_KHR             0x30B1
+#define EGL_GL_TEXTURE_LEVEL_KHR          0x30BC
+#endif /* EGL_KHR_gl_texture_2D_image */
+
+#ifndef EGL_KHR_gl_texture_3D_image
+#define EGL_KHR_gl_texture_3D_image 1
+#define EGL_GL_TEXTURE_3D_KHR             0x30B2
+#define EGL_GL_TEXTURE_ZOFFSET_KHR        0x30BD
+#endif /* EGL_KHR_gl_texture_3D_image */
+
+#ifndef EGL_KHR_gl_texture_cubemap_image
+#define EGL_KHR_gl_texture_cubemap_image 1
+#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_X_KHR 0x30B3
+#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_X_KHR 0x30B4
+#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Y_KHR 0x30B5
+#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_KHR 0x30B6
+#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Z_KHR 0x30B7
+#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_KHR 0x30B8
+#endif /* EGL_KHR_gl_texture_cubemap_image */
+
+#ifndef EGL_KHR_image
+#define EGL_KHR_image 1
+typedef void *EGLImageKHR;
+#define EGL_NATIVE_PIXMAP_KHR             0x30B0
+#define EGL_NO_IMAGE_KHR                  ((EGLImageKHR)0)
+typedef EGLImageKHR (EGLAPIENTRYP PFNEGLCREATEIMAGEKHRPROC) (EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLint *attrib_list);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYIMAGEKHRPROC) (EGLDisplay dpy, EGLImageKHR image);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLImageKHR EGLAPIENTRY eglCreateImageKHR (EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLint *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglDestroyImageKHR (EGLDisplay dpy, EGLImageKHR image);
+#endif
+#endif /* EGL_KHR_image */
+
+#ifndef EGL_KHR_image_base
+#define EGL_KHR_image_base 1
+#define EGL_IMAGE_PRESERVED_KHR           0x30D2
+#endif /* EGL_KHR_image_base */
+
+#ifndef EGL_KHR_image_pixmap
+#define EGL_KHR_image_pixmap 1
+#endif /* EGL_KHR_image_pixmap */
+
+#ifndef EGL_KHR_lock_surface
+#define EGL_KHR_lock_surface 1
+#define EGL_READ_SURFACE_BIT_KHR          0x0001
+#define EGL_WRITE_SURFACE_BIT_KHR         0x0002
+#define EGL_LOCK_SURFACE_BIT_KHR          0x0080
+#define EGL_OPTIMAL_FORMAT_BIT_KHR        0x0100
+#define EGL_MATCH_FORMAT_KHR              0x3043
+#define EGL_FORMAT_RGB_565_EXACT_KHR      0x30C0
+#define EGL_FORMAT_RGB_565_KHR            0x30C1
+#define EGL_FORMAT_RGBA_8888_EXACT_KHR    0x30C2
+#define EGL_FORMAT_RGBA_8888_KHR          0x30C3
+#define EGL_MAP_PRESERVE_PIXELS_KHR       0x30C4
+#define EGL_LOCK_USAGE_HINT_KHR           0x30C5
+#define EGL_BITMAP_POINTER_KHR            0x30C6
+#define EGL_BITMAP_PITCH_KHR              0x30C7
+#define EGL_BITMAP_ORIGIN_KHR             0x30C8
+#define EGL_BITMAP_PIXEL_RED_OFFSET_KHR   0x30C9
+#define EGL_BITMAP_PIXEL_GREEN_OFFSET_KHR 0x30CA
+#define EGL_BITMAP_PIXEL_BLUE_OFFSET_KHR  0x30CB
+#define EGL_BITMAP_PIXEL_ALPHA_OFFSET_KHR 0x30CC
+#define EGL_BITMAP_PIXEL_LUMINANCE_OFFSET_KHR 0x30CD
+#define EGL_LOWER_LEFT_KHR                0x30CE
+#define EGL_UPPER_LEFT_KHR                0x30CF
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLLOCKSURFACEKHRPROC) (EGLDisplay dpy, EGLSurface surface, const EGLint *attrib_list);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLUNLOCKSURFACEKHRPROC) (EGLDisplay dpy, EGLSurface surface);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglLockSurfaceKHR (EGLDisplay dpy, EGLSurface surface, const EGLint *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglUnlockSurfaceKHR (EGLDisplay dpy, EGLSurface surface);
+#endif
+#endif /* EGL_KHR_lock_surface */
+
+#ifndef EGL_KHR_lock_surface2
+#define EGL_KHR_lock_surface2 1
+#define EGL_BITMAP_PIXEL_SIZE_KHR         0x3110
+#endif /* EGL_KHR_lock_surface2 */
+
+#ifndef EGL_KHR_lock_surface3
+#define EGL_KHR_lock_surface3 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSURFACE64KHRPROC) (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLAttribKHR *value);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurface64KHR (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLAttribKHR *value);
+#endif
+#endif /* EGL_KHR_lock_surface3 */
+
+#ifndef EGL_KHR_platform_android
+#define EGL_KHR_platform_android 1
+#define EGL_PLATFORM_ANDROID_KHR          0x3141
+#endif /* EGL_KHR_platform_android */
+
+#ifndef EGL_KHR_platform_gbm
+#define EGL_KHR_platform_gbm 1
+#define EGL_PLATFORM_GBM_KHR              0x31D7
+#endif /* EGL_KHR_platform_gbm */
+
+#ifndef EGL_KHR_platform_wayland
+#define EGL_KHR_platform_wayland 1
+#define EGL_PLATFORM_WAYLAND_KHR          0x31D8
+#endif /* EGL_KHR_platform_wayland */
+
+#ifndef EGL_KHR_platform_x11
+#define EGL_KHR_platform_x11 1
+#define EGL_PLATFORM_X11_KHR              0x31D5
+#define EGL_PLATFORM_X11_SCREEN_KHR       0x31D6
+#endif /* EGL_KHR_platform_x11 */
+
+#ifndef EGL_KHR_reusable_sync
+#define EGL_KHR_reusable_sync 1
+typedef khronos_utime_nanoseconds_t EGLTimeKHR;
+#ifdef KHRONOS_SUPPORT_INT64
+#define EGL_SYNC_STATUS_KHR               0x30F1
+#define EGL_SIGNALED_KHR                  0x30F2
+#define EGL_UNSIGNALED_KHR                0x30F3
+#define EGL_TIMEOUT_EXPIRED_KHR           0x30F5
+#define EGL_CONDITION_SATISFIED_KHR       0x30F6
+#define EGL_SYNC_TYPE_KHR                 0x30F7
+#define EGL_SYNC_REUSABLE_KHR             0x30FA
+#define EGL_SYNC_FLUSH_COMMANDS_BIT_KHR   0x0001
+#define EGL_FOREVER_KHR                   0xFFFFFFFFFFFFFFFFull
+#define EGL_NO_SYNC_KHR                   ((EGLSyncKHR)0)
+typedef EGLSyncKHR (EGLAPIENTRYP PFNEGLCREATESYNCKHRPROC) (EGLDisplay dpy, EGLenum type, const EGLint *attrib_list);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync);
+typedef EGLint (EGLAPIENTRYP PFNEGLCLIENTWAITSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSIGNALSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETSYNCATTRIBKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateSyncKHR (EGLDisplay dpy, EGLenum type, const EGLint *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglDestroySyncKHR (EGLDisplay dpy, EGLSyncKHR sync);
+EGLAPI EGLint EGLAPIENTRY eglClientWaitSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout);
+EGLAPI EGLBoolean EGLAPIENTRY eglSignalSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncAttribKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value);
+#endif
+#endif /* KHRONOS_SUPPORT_INT64 */
+#endif /* EGL_KHR_reusable_sync */
+
+#ifndef EGL_KHR_stream
+#define EGL_KHR_stream 1
+typedef void *EGLStreamKHR;
+typedef khronos_uint64_t EGLuint64KHR;
+#ifdef KHRONOS_SUPPORT_INT64
+#define EGL_NO_STREAM_KHR                 ((EGLStreamKHR)0)
+#define EGL_CONSUMER_LATENCY_USEC_KHR     0x3210
+#define EGL_PRODUCER_FRAME_KHR            0x3212
+#define EGL_CONSUMER_FRAME_KHR            0x3213
+#define EGL_STREAM_STATE_KHR              0x3214
+#define EGL_STREAM_STATE_CREATED_KHR      0x3215
+#define EGL_STREAM_STATE_CONNECTING_KHR   0x3216
+#define EGL_STREAM_STATE_EMPTY_KHR        0x3217
+#define EGL_STREAM_STATE_NEW_FRAME_AVAILABLE_KHR 0x3218
+#define EGL_STREAM_STATE_OLD_FRAME_AVAILABLE_KHR 0x3219
+#define EGL_STREAM_STATE_DISCONNECTED_KHR 0x321A
+#define EGL_BAD_STREAM_KHR                0x321B
+#define EGL_BAD_STATE_KHR                 0x321C
+typedef EGLStreamKHR (EGLAPIENTRYP PFNEGLCREATESTREAMKHRPROC) (EGLDisplay dpy, const EGLint *attrib_list);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSTREAMKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMATTRIBKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint value);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSTREAMKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint *value);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSTREAMU64KHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLuint64KHR *value);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLStreamKHR EGLAPIENTRY eglCreateStreamKHR (EGLDisplay dpy, const EGLint *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglDestroyStreamKHR (EGLDisplay dpy, EGLStreamKHR stream);
+EGLAPI EGLBoolean EGLAPIENTRY eglStreamAttribKHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint value);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryStreamKHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint *value);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryStreamu64KHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLuint64KHR *value);
+#endif
+#endif /* KHRONOS_SUPPORT_INT64 */
+#endif /* EGL_KHR_stream */
+
+#ifndef EGL_KHR_stream_consumer_gltexture
+#define EGL_KHR_stream_consumer_gltexture 1
+#ifdef EGL_KHR_stream
+#define EGL_CONSUMER_ACQUIRE_TIMEOUT_USEC_KHR 0x321E
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMERGLTEXTUREEXTERNALKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMERACQUIREKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMERRELEASEKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerGLTextureExternalKHR (EGLDisplay dpy, EGLStreamKHR stream);
+EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerAcquireKHR (EGLDisplay dpy, EGLStreamKHR stream);
+EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerReleaseKHR (EGLDisplay dpy, EGLStreamKHR stream);
+#endif
+#endif /* EGL_KHR_stream */
+#endif /* EGL_KHR_stream_consumer_gltexture */
+
+#ifndef EGL_KHR_stream_cross_process_fd
+#define EGL_KHR_stream_cross_process_fd 1
+typedef int EGLNativeFileDescriptorKHR;
+#ifdef EGL_KHR_stream
+#define EGL_NO_FILE_DESCRIPTOR_KHR        ((EGLNativeFileDescriptorKHR)(-1))
+typedef EGLNativeFileDescriptorKHR (EGLAPIENTRYP PFNEGLGETSTREAMFILEDESCRIPTORKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream);
+typedef EGLStreamKHR (EGLAPIENTRYP PFNEGLCREATESTREAMFROMFILEDESCRIPTORKHRPROC) (EGLDisplay dpy, EGLNativeFileDescriptorKHR file_descriptor);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLNativeFileDescriptorKHR EGLAPIENTRY eglGetStreamFileDescriptorKHR (EGLDisplay dpy, EGLStreamKHR stream);
+EGLAPI EGLStreamKHR EGLAPIENTRY eglCreateStreamFromFileDescriptorKHR (EGLDisplay dpy, EGLNativeFileDescriptorKHR file_descriptor);
+#endif
+#endif /* EGL_KHR_stream */
+#endif /* EGL_KHR_stream_cross_process_fd */
+
+#ifndef EGL_KHR_stream_fifo
+#define EGL_KHR_stream_fifo 1
+#ifdef EGL_KHR_stream
+#define EGL_STREAM_FIFO_LENGTH_KHR        0x31FC
+#define EGL_STREAM_TIME_NOW_KHR           0x31FD
+#define EGL_STREAM_TIME_CONSUMER_KHR      0x31FE
+#define EGL_STREAM_TIME_PRODUCER_KHR      0x31FF
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSTREAMTIMEKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLTimeKHR *value);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryStreamTimeKHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLTimeKHR *value);
+#endif
+#endif /* EGL_KHR_stream */
+#endif /* EGL_KHR_stream_fifo */
+
+#ifndef EGL_KHR_stream_producer_aldatalocator
+#define EGL_KHR_stream_producer_aldatalocator 1
+#ifdef EGL_KHR_stream
+#endif /* EGL_KHR_stream */
+#endif /* EGL_KHR_stream_producer_aldatalocator */
+
+#ifndef EGL_KHR_stream_producer_eglsurface
+#define EGL_KHR_stream_producer_eglsurface 1
+#ifdef EGL_KHR_stream
+#define EGL_STREAM_BIT_KHR                0x0800
+typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATESTREAMPRODUCERSURFACEKHRPROC) (EGLDisplay dpy, EGLConfig config, EGLStreamKHR stream, const EGLint *attrib_list);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLSurface EGLAPIENTRY eglCreateStreamProducerSurfaceKHR (EGLDisplay dpy, EGLConfig config, EGLStreamKHR stream, const EGLint *attrib_list);
+#endif
+#endif /* EGL_KHR_stream */
+#endif /* EGL_KHR_stream_producer_eglsurface */
+
+#ifndef EGL_KHR_surfaceless_context
+#define EGL_KHR_surfaceless_context 1
+#endif /* EGL_KHR_surfaceless_context */
+
+#ifndef EGL_KHR_vg_parent_image
+#define EGL_KHR_vg_parent_image 1
+#define EGL_VG_PARENT_IMAGE_KHR           0x30BA
+#endif /* EGL_KHR_vg_parent_image */
+
+#ifndef EGL_KHR_wait_sync
+#define EGL_KHR_wait_sync 1
+typedef EGLint (EGLAPIENTRYP PFNEGLWAITSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLint EGLAPIENTRY eglWaitSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags);
+#endif
+#endif /* EGL_KHR_wait_sync */
+
+#ifndef EGL_ANDROID_blob_cache
+#define EGL_ANDROID_blob_cache 1
+typedef khronos_ssize_t EGLsizeiANDROID;
+typedef void (*EGLSetBlobFuncANDROID) (const void *key, EGLsizeiANDROID keySize, const void *value, EGLsizeiANDROID valueSize);
+typedef EGLsizeiANDROID (*EGLGetBlobFuncANDROID) (const void *key, EGLsizeiANDROID keySize, void *value, EGLsizeiANDROID valueSize);
+typedef void (EGLAPIENTRYP PFNEGLSETBLOBCACHEFUNCSANDROIDPROC) (EGLDisplay dpy, EGLSetBlobFuncANDROID set, EGLGetBlobFuncANDROID get);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI void EGLAPIENTRY eglSetBlobCacheFuncsANDROID (EGLDisplay dpy, EGLSetBlobFuncANDROID set, EGLGetBlobFuncANDROID get);
+#endif
+#endif /* EGL_ANDROID_blob_cache */
+
+#ifndef EGL_ANDROID_framebuffer_target
+#define EGL_ANDROID_framebuffer_target 1
+#define EGL_FRAMEBUFFER_TARGET_ANDROID    0x3147
+#endif /* EGL_ANDROID_framebuffer_target */
+
+#ifndef EGL_ANDROID_image_native_buffer
+#define EGL_ANDROID_image_native_buffer 1
+#define EGL_NATIVE_BUFFER_ANDROID         0x3140
+#endif /* EGL_ANDROID_image_native_buffer */
+
+#ifndef EGL_ANDROID_native_fence_sync
+#define EGL_ANDROID_native_fence_sync 1
+#define EGL_SYNC_NATIVE_FENCE_ANDROID     0x3144
+#define EGL_SYNC_NATIVE_FENCE_FD_ANDROID  0x3145
+#define EGL_SYNC_NATIVE_FENCE_SIGNALED_ANDROID 0x3146
+#define EGL_NO_NATIVE_FENCE_FD_ANDROID    -1
+typedef EGLint (EGLAPIENTRYP PFNEGLDUPNATIVEFENCEFDANDROIDPROC) (EGLDisplay dpy, EGLSyncKHR sync);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLint EGLAPIENTRY eglDupNativeFenceFDANDROID (EGLDisplay dpy, EGLSyncKHR sync);
+#endif
+#endif /* EGL_ANDROID_native_fence_sync */
+
+#ifndef EGL_ANDROID_recordable
+#define EGL_ANDROID_recordable 1
+#define EGL_RECORDABLE_ANDROID            0x3142
+#endif /* EGL_ANDROID_recordable */
+
+#ifndef EGL_ANGLE_d3d_share_handle_client_buffer
+#define EGL_ANGLE_d3d_share_handle_client_buffer 1
+#define EGL_D3D_TEXTURE_2D_SHARE_HANDLE_ANGLE 0x3200
+#endif /* EGL_ANGLE_d3d_share_handle_client_buffer */
+
+#ifndef EGL_ANGLE_query_surface_pointer
+#define EGL_ANGLE_query_surface_pointer 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSURFACEPOINTERANGLEPROC) (EGLDisplay dpy, EGLSurface surface, EGLint attribute, void **value);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurfacePointerANGLE (EGLDisplay dpy, EGLSurface surface, EGLint attribute, void **value);
+#endif
+#endif /* EGL_ANGLE_query_surface_pointer */
+
+#ifndef EGL_ANGLE_surface_d3d_texture_2d_share_handle
+#define EGL_ANGLE_surface_d3d_texture_2d_share_handle 1
+#endif /* EGL_ANGLE_surface_d3d_texture_2d_share_handle */
+
+#ifndef EGL_ARM_pixmap_multisample_discard
+#define EGL_ARM_pixmap_multisample_discard 1
+#define EGL_DISCARD_SAMPLES_ARM           0x3286
+#endif /* EGL_ARM_pixmap_multisample_discard */
+
+#ifndef EGL_EXT_buffer_age
+#define EGL_EXT_buffer_age 1
+#define EGL_BUFFER_AGE_EXT                0x313D
+#endif /* EGL_EXT_buffer_age */
+
+#ifndef EGL_EXT_client_extensions
+#define EGL_EXT_client_extensions 1
+#endif /* EGL_EXT_client_extensions */
+
+#ifndef EGL_EXT_create_context_robustness
+#define EGL_EXT_create_context_robustness 1
+#define EGL_CONTEXT_OPENGL_ROBUST_ACCESS_EXT 0x30BF
+#define EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY_EXT 0x3138
+#define EGL_NO_RESET_NOTIFICATION_EXT     0x31BE
+#define EGL_LOSE_CONTEXT_ON_RESET_EXT     0x31BF
+#endif /* EGL_EXT_create_context_robustness */
+
+#ifndef EGL_EXT_device_base
+#define EGL_EXT_device_base 1
+typedef void *EGLDeviceEXT;
+#define EGL_NO_DEVICE_EXT                 ((EGLDeviceEXT)(0))
+#define EGL_BAD_DEVICE_EXT                0x322B
+#define EGL_DEVICE_EXT                    0x322C
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDEVICEATTRIBEXTPROC) (EGLDeviceEXT device, EGLint attribute, EGLAttrib *value);
+typedef const char *(EGLAPIENTRYP PFNEGLQUERYDEVICESTRINGEXTPROC) (EGLDeviceEXT device, EGLint name);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDEVICESEXTPROC) (EGLint max_devices, EGLDeviceEXT *devices, EGLint *num_devices);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDISPLAYATTRIBEXTPROC) (EGLDisplay dpy, EGLint attribute, EGLAttrib *value);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryDeviceAttribEXT (EGLDeviceEXT device, EGLint attribute, EGLAttrib *value);
+EGLAPI const char *EGLAPIENTRY eglQueryDeviceStringEXT (EGLDeviceEXT device, EGLint name);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryDevicesEXT (EGLint max_devices, EGLDeviceEXT *devices, EGLint *num_devices);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryDisplayAttribEXT (EGLDisplay dpy, EGLint attribute, EGLAttrib *value);
+#endif
+#endif /* EGL_EXT_device_base */
+
+#ifndef EGL_EXT_device_drm
+#define EGL_EXT_device_drm 1
+#define EGL_DRM_DEVICE_FILE_EXT           0x3233
+#endif /* EGL_EXT_device_drm */
+
+#ifndef EGL_EXT_device_openwf
+#define EGL_EXT_device_openwf 1
+#define EGL_OPENWF_DEVICE_ID_EXT          0x3237
+#endif /* EGL_EXT_device_openwf */
+
+#ifndef EGL_EXT_image_dma_buf_import
+#define EGL_EXT_image_dma_buf_import 1
+#define EGL_LINUX_DMA_BUF_EXT             0x3270
+#define EGL_LINUX_DRM_FOURCC_EXT          0x3271
+#define EGL_DMA_BUF_PLANE0_FD_EXT         0x3272
+#define EGL_DMA_BUF_PLANE0_OFFSET_EXT     0x3273
+#define EGL_DMA_BUF_PLANE0_PITCH_EXT      0x3274
+#define EGL_DMA_BUF_PLANE1_FD_EXT         0x3275
+#define EGL_DMA_BUF_PLANE1_OFFSET_EXT     0x3276
+#define EGL_DMA_BUF_PLANE1_PITCH_EXT      0x3277
+#define EGL_DMA_BUF_PLANE2_FD_EXT         0x3278
+#define EGL_DMA_BUF_PLANE2_OFFSET_EXT     0x3279
+#define EGL_DMA_BUF_PLANE2_PITCH_EXT      0x327A
+#define EGL_YUV_COLOR_SPACE_HINT_EXT      0x327B
+#define EGL_SAMPLE_RANGE_HINT_EXT         0x327C
+#define EGL_YUV_CHROMA_HORIZONTAL_SITING_HINT_EXT 0x327D
+#define EGL_YUV_CHROMA_VERTICAL_SITING_HINT_EXT 0x327E
+#define EGL_ITU_REC601_EXT                0x327F
+#define EGL_ITU_REC709_EXT                0x3280
+#define EGL_ITU_REC2020_EXT               0x3281
+#define EGL_YUV_FULL_RANGE_EXT            0x3282
+#define EGL_YUV_NARROW_RANGE_EXT          0x3283
+#define EGL_YUV_CHROMA_SITING_0_EXT       0x3284
+#define EGL_YUV_CHROMA_SITING_0_5_EXT     0x3285
+#endif /* EGL_EXT_image_dma_buf_import */
+
+#ifndef EGL_EXT_multiview_window
+#define EGL_EXT_multiview_window 1
+#define EGL_MULTIVIEW_VIEW_COUNT_EXT      0x3134
+#endif /* EGL_EXT_multiview_window */
+
+#ifndef EGL_EXT_output_base
+#define EGL_EXT_output_base 1
+typedef void *EGLOutputLayerEXT;
+typedef void *EGLOutputPortEXT;
+#define EGL_NO_OUTPUT_LAYER_EXT           ((EGLOutputLayerEXT)0)
+#define EGL_NO_OUTPUT_PORT_EXT            ((EGLOutputPortEXT)0)
+#define EGL_BAD_OUTPUT_LAYER_EXT          0x322D
+#define EGL_BAD_OUTPUT_PORT_EXT           0x322E
+#define EGL_SWAP_INTERVAL_EXT             0x322F
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETOUTPUTLAYERSEXTPROC) (EGLDisplay dpy, const EGLAttrib *attrib_list, EGLOutputLayerEXT *layers, EGLint max_layers, EGLint *num_layers);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETOUTPUTPORTSEXTPROC) (EGLDisplay dpy, const EGLAttrib *attrib_list, EGLOutputPortEXT *ports, EGLint max_ports, EGLint *num_ports);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLOUTPUTLAYERATTRIBEXTPROC) (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint attribute, EGLAttrib value);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYOUTPUTLAYERATTRIBEXTPROC) (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint attribute, EGLAttrib *value);
+typedef const char *(EGLAPIENTRYP PFNEGLQUERYOUTPUTLAYERSTRINGEXTPROC) (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint name);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLOUTPUTPORTATTRIBEXTPROC) (EGLDisplay dpy, EGLOutputPortEXT port, EGLint attribute, EGLAttrib value);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYOUTPUTPORTATTRIBEXTPROC) (EGLDisplay dpy, EGLOutputPortEXT port, EGLint attribute, EGLAttrib *value);
+typedef const char *(EGLAPIENTRYP PFNEGLQUERYOUTPUTPORTSTRINGEXTPROC) (EGLDisplay dpy, EGLOutputPortEXT port, EGLint name);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglGetOutputLayersEXT (EGLDisplay dpy, const EGLAttrib *attrib_list, EGLOutputLayerEXT *layers, EGLint max_layers, EGLint *num_layers);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetOutputPortsEXT (EGLDisplay dpy, const EGLAttrib *attrib_list, EGLOutputPortEXT *ports, EGLint max_ports, EGLint *num_ports);
+EGLAPI EGLBoolean EGLAPIENTRY eglOutputLayerAttribEXT (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint attribute, EGLAttrib value);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryOutputLayerAttribEXT (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint attribute, EGLAttrib *value);
+EGLAPI const char *EGLAPIENTRY eglQueryOutputLayerStringEXT (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint name);
+EGLAPI EGLBoolean EGLAPIENTRY eglOutputPortAttribEXT (EGLDisplay dpy, EGLOutputPortEXT port, EGLint attribute, EGLAttrib value);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryOutputPortAttribEXT (EGLDisplay dpy, EGLOutputPortEXT port, EGLint attribute, EGLAttrib *value);
+EGLAPI const char *EGLAPIENTRY eglQueryOutputPortStringEXT (EGLDisplay dpy, EGLOutputPortEXT port, EGLint name);
+#endif
+#endif /* EGL_EXT_output_base */
+
+#ifndef EGL_EXT_output_drm
+#define EGL_EXT_output_drm 1
+#define EGL_DRM_CRTC_EXT                  0x3234
+#define EGL_DRM_PLANE_EXT                 0x3235
+#define EGL_DRM_CONNECTOR_EXT             0x3236
+#endif /* EGL_EXT_output_drm */
+
+#ifndef EGL_EXT_output_openwf
+#define EGL_EXT_output_openwf 1
+#define EGL_OPENWF_PIPELINE_ID_EXT        0x3238
+#define EGL_OPENWF_PORT_ID_EXT            0x3239
+#endif /* EGL_EXT_output_openwf */
+
+#ifndef EGL_EXT_platform_base
+#define EGL_EXT_platform_base 1
+typedef EGLDisplay (EGLAPIENTRYP PFNEGLGETPLATFORMDISPLAYEXTPROC) (EGLenum platform, void *native_display, const EGLint *attrib_list);
+typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPLATFORMWINDOWSURFACEEXTPROC) (EGLDisplay dpy, EGLConfig config, void *native_window, const EGLint *attrib_list);
+typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPLATFORMPIXMAPSURFACEEXTPROC) (EGLDisplay dpy, EGLConfig config, void *native_pixmap, const EGLint *attrib_list);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLDisplay EGLAPIENTRY eglGetPlatformDisplayEXT (EGLenum platform, void *native_display, const EGLint *attrib_list);
+EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformWindowSurfaceEXT (EGLDisplay dpy, EGLConfig config, void *native_window, const EGLint *attrib_list);
+EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformPixmapSurfaceEXT (EGLDisplay dpy, EGLConfig config, void *native_pixmap, const EGLint *attrib_list);
+#endif
+#endif /* EGL_EXT_platform_base */
+
+#ifndef EGL_EXT_platform_device
+#define EGL_EXT_platform_device 1
+#define EGL_PLATFORM_DEVICE_EXT           0x313F
+#endif /* EGL_EXT_platform_device */
+
+#ifndef EGL_EXT_platform_wayland
+#define EGL_EXT_platform_wayland 1
+#define EGL_PLATFORM_WAYLAND_EXT          0x31D8
+#endif /* EGL_EXT_platform_wayland */
+
+#ifndef EGL_EXT_platform_x11
+#define EGL_EXT_platform_x11 1
+#define EGL_PLATFORM_X11_EXT              0x31D5
+#define EGL_PLATFORM_X11_SCREEN_EXT       0x31D6
+#endif /* EGL_EXT_platform_x11 */
+
+#ifndef EGL_EXT_protected_surface
+#define EGL_EXT_protected_surface 1
+#define EGL_PROTECTED_CONTENT_EXT         0x32C0
+#endif /* EGL_EXT_protected_surface */
+
+#ifndef EGL_EXT_stream_consumer_egloutput
+#define EGL_EXT_stream_consumer_egloutput 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMEROUTPUTEXTPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLOutputLayerEXT layer);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerOutputEXT (EGLDisplay dpy, EGLStreamKHR stream, EGLOutputLayerEXT layer);
+#endif
+#endif /* EGL_EXT_stream_consumer_egloutput */
+
+#ifndef EGL_EXT_swap_buffers_with_damage
+#define EGL_EXT_swap_buffers_with_damage 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSWITHDAMAGEEXTPROC) (EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersWithDamageEXT (EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects);
+#endif
+#endif /* EGL_EXT_swap_buffers_with_damage */
+
+#ifndef EGL_HI_clientpixmap
+#define EGL_HI_clientpixmap 1
+struct EGLClientPixmapHI {
+    void  *pData;
+    EGLint iWidth;
+    EGLint iHeight;
+    EGLint iStride;
+};
+#define EGL_CLIENT_PIXMAP_POINTER_HI      0x8F74
+typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPIXMAPSURFACEHIPROC) (EGLDisplay dpy, EGLConfig config, struct EGLClientPixmapHI *pixmap);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLSurface EGLAPIENTRY eglCreatePixmapSurfaceHI (EGLDisplay dpy, EGLConfig config, struct EGLClientPixmapHI *pixmap);
+#endif
+#endif /* EGL_HI_clientpixmap */
+
+#ifndef EGL_HI_colorformats
+#define EGL_HI_colorformats 1
+#define EGL_COLOR_FORMAT_HI               0x8F70
+#define EGL_COLOR_RGB_HI                  0x8F71
+#define EGL_COLOR_RGBA_HI                 0x8F72
+#define EGL_COLOR_ARGB_HI                 0x8F73
+#endif /* EGL_HI_colorformats */
+
+#ifndef EGL_IMG_context_priority
+#define EGL_IMG_context_priority 1
+#define EGL_CONTEXT_PRIORITY_LEVEL_IMG    0x3100
+#define EGL_CONTEXT_PRIORITY_HIGH_IMG     0x3101
+#define EGL_CONTEXT_PRIORITY_MEDIUM_IMG   0x3102
+#define EGL_CONTEXT_PRIORITY_LOW_IMG      0x3103
+#endif /* EGL_IMG_context_priority */
+
+#ifndef EGL_MESA_drm_image
+#define EGL_MESA_drm_image 1
+#define EGL_DRM_BUFFER_FORMAT_MESA        0x31D0
+#define EGL_DRM_BUFFER_USE_MESA           0x31D1
+#define EGL_DRM_BUFFER_FORMAT_ARGB32_MESA 0x31D2
+#define EGL_DRM_BUFFER_MESA               0x31D3
+#define EGL_DRM_BUFFER_STRIDE_MESA        0x31D4
+#define EGL_DRM_BUFFER_USE_SCANOUT_MESA   0x00000001
+#define EGL_DRM_BUFFER_USE_SHARE_MESA     0x00000002
+typedef EGLImageKHR (EGLAPIENTRYP PFNEGLCREATEDRMIMAGEMESAPROC) (EGLDisplay dpy, const EGLint *attrib_list);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLEXPORTDRMIMAGEMESAPROC) (EGLDisplay dpy, EGLImageKHR image, EGLint *name, EGLint *handle, EGLint *stride);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLImageKHR EGLAPIENTRY eglCreateDRMImageMESA (EGLDisplay dpy, const EGLint *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglExportDRMImageMESA (EGLDisplay dpy, EGLImageKHR image, EGLint *name, EGLint *handle, EGLint *stride);
+#endif
+#endif /* EGL_MESA_drm_image */
+
+#ifndef EGL_MESA_platform_gbm
+#define EGL_MESA_platform_gbm 1
+#define EGL_PLATFORM_GBM_MESA             0x31D7
+#endif /* EGL_MESA_platform_gbm */
+
+#ifndef EGL_NOK_swap_region
+#define EGL_NOK_swap_region 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSREGIONNOKPROC) (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint *rects);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersRegionNOK (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint *rects);
+#endif
+#endif /* EGL_NOK_swap_region */
+
+#ifndef EGL_NOK_swap_region2
+#define EGL_NOK_swap_region2 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSREGION2NOKPROC) (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint *rects);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersRegion2NOK (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint *rects);
+#endif
+#endif /* EGL_NOK_swap_region2 */
+
+#ifndef EGL_NOK_texture_from_pixmap
+#define EGL_NOK_texture_from_pixmap 1
+#define EGL_Y_INVERTED_NOK                0x307F
+#endif /* EGL_NOK_texture_from_pixmap */
+
+#ifndef EGL_NV_3dvision_surface
+#define EGL_NV_3dvision_surface 1
+#define EGL_AUTO_STEREO_NV                0x3136
+#endif /* EGL_NV_3dvision_surface */
+
+#ifndef EGL_NV_coverage_sample
+#define EGL_NV_coverage_sample 1
+#define EGL_COVERAGE_BUFFERS_NV           0x30E0
+#define EGL_COVERAGE_SAMPLES_NV           0x30E1
+#endif /* EGL_NV_coverage_sample */
+
+#ifndef EGL_NV_coverage_sample_resolve
+#define EGL_NV_coverage_sample_resolve 1
+#define EGL_COVERAGE_SAMPLE_RESOLVE_NV    0x3131
+#define EGL_COVERAGE_SAMPLE_RESOLVE_DEFAULT_NV 0x3132
+#define EGL_COVERAGE_SAMPLE_RESOLVE_NONE_NV 0x3133
+#endif /* EGL_NV_coverage_sample_resolve */
+
+#ifndef EGL_NV_cuda_event
+#define EGL_NV_cuda_event 1
+#define EGL_CUDA_EVENT_HANDLE_NV          0x323B
+#define EGL_SYNC_CUDA_EVENT_NV            0x323C
+#define EGL_SYNC_CUDA_EVENT_COMPLETE_NV   0x323D
+#endif /* EGL_NV_cuda_event */
+
+#ifndef EGL_NV_depth_nonlinear
+#define EGL_NV_depth_nonlinear 1
+#define EGL_DEPTH_ENCODING_NV             0x30E2
+#define EGL_DEPTH_ENCODING_NONE_NV        0
+#define EGL_DEPTH_ENCODING_NONLINEAR_NV   0x30E3
+#endif /* EGL_NV_depth_nonlinear */
+
+#ifndef EGL_NV_device_cuda
+#define EGL_NV_device_cuda 1
+#define EGL_CUDA_DEVICE_NV                0x323A
+#endif /* EGL_NV_device_cuda */
+
+#ifndef EGL_NV_native_query
+#define EGL_NV_native_query 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYNATIVEDISPLAYNVPROC) (EGLDisplay dpy, EGLNativeDisplayType *display_id);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYNATIVEWINDOWNVPROC) (EGLDisplay dpy, EGLSurface surf, EGLNativeWindowType *window);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYNATIVEPIXMAPNVPROC) (EGLDisplay dpy, EGLSurface surf, EGLNativePixmapType *pixmap);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryNativeDisplayNV (EGLDisplay dpy, EGLNativeDisplayType *display_id);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryNativeWindowNV (EGLDisplay dpy, EGLSurface surf, EGLNativeWindowType *window);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryNativePixmapNV (EGLDisplay dpy, EGLSurface surf, EGLNativePixmapType *pixmap);
+#endif
+#endif /* EGL_NV_native_query */
+
+#ifndef EGL_NV_post_convert_rounding
+#define EGL_NV_post_convert_rounding 1
+#endif /* EGL_NV_post_convert_rounding */
+
+#ifndef EGL_NV_post_sub_buffer
+#define EGL_NV_post_sub_buffer 1
+#define EGL_POST_SUB_BUFFER_SUPPORTED_NV  0x30BE
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLPOSTSUBBUFFERNVPROC) (EGLDisplay dpy, EGLSurface surface, EGLint x, EGLint y, EGLint width, EGLint height);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglPostSubBufferNV (EGLDisplay dpy, EGLSurface surface, EGLint x, EGLint y, EGLint width, EGLint height);
+#endif
+#endif /* EGL_NV_post_sub_buffer */
+
+#ifndef EGL_NV_stream_sync
+#define EGL_NV_stream_sync 1
+#define EGL_SYNC_NEW_FRAME_NV             0x321F
+typedef EGLSyncKHR (EGLAPIENTRYP PFNEGLCREATESTREAMSYNCNVPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum type, const EGLint *attrib_list);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateStreamSyncNV (EGLDisplay dpy, EGLStreamKHR stream, EGLenum type, const EGLint *attrib_list);
+#endif
+#endif /* EGL_NV_stream_sync */
+
+#ifndef EGL_NV_sync
+#define EGL_NV_sync 1
+typedef void *EGLSyncNV;
+typedef khronos_utime_nanoseconds_t EGLTimeNV;
+#ifdef KHRONOS_SUPPORT_INT64
+#define EGL_SYNC_PRIOR_COMMANDS_COMPLETE_NV 0x30E6
+#define EGL_SYNC_STATUS_NV                0x30E7
+#define EGL_SIGNALED_NV                   0x30E8
+#define EGL_UNSIGNALED_NV                 0x30E9
+#define EGL_SYNC_FLUSH_COMMANDS_BIT_NV    0x0001
+#define EGL_FOREVER_NV                    0xFFFFFFFFFFFFFFFFull
+#define EGL_ALREADY_SIGNALED_NV           0x30EA
+#define EGL_TIMEOUT_EXPIRED_NV            0x30EB
+#define EGL_CONDITION_SATISFIED_NV        0x30EC
+#define EGL_SYNC_TYPE_NV                  0x30ED
+#define EGL_SYNC_CONDITION_NV             0x30EE
+#define EGL_SYNC_FENCE_NV                 0x30EF
+#define EGL_NO_SYNC_NV                    ((EGLSyncNV)0)
+typedef EGLSyncNV (EGLAPIENTRYP PFNEGLCREATEFENCESYNCNVPROC) (EGLDisplay dpy, EGLenum condition, const EGLint *attrib_list);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSYNCNVPROC) (EGLSyncNV sync);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLFENCENVPROC) (EGLSyncNV sync);
+typedef EGLint (EGLAPIENTRYP PFNEGLCLIENTWAITSYNCNVPROC) (EGLSyncNV sync, EGLint flags, EGLTimeNV timeout);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSIGNALSYNCNVPROC) (EGLSyncNV sync, EGLenum mode);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETSYNCATTRIBNVPROC) (EGLSyncNV sync, EGLint attribute, EGLint *value);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLSyncNV EGLAPIENTRY eglCreateFenceSyncNV (EGLDisplay dpy, EGLenum condition, const EGLint *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglDestroySyncNV (EGLSyncNV sync);
+EGLAPI EGLBoolean EGLAPIENTRY eglFenceNV (EGLSyncNV sync);
+EGLAPI EGLint EGLAPIENTRY eglClientWaitSyncNV (EGLSyncNV sync, EGLint flags, EGLTimeNV timeout);
+EGLAPI EGLBoolean EGLAPIENTRY eglSignalSyncNV (EGLSyncNV sync, EGLenum mode);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncAttribNV (EGLSyncNV sync, EGLint attribute, EGLint *value);
+#endif
+#endif /* KHRONOS_SUPPORT_INT64 */
+#endif /* EGL_NV_sync */
+
+#ifndef EGL_NV_system_time
+#define EGL_NV_system_time 1
+typedef khronos_utime_nanoseconds_t EGLuint64NV;
+#ifdef KHRONOS_SUPPORT_INT64
+typedef EGLuint64NV (EGLAPIENTRYP PFNEGLGETSYSTEMTIMEFREQUENCYNVPROC) (void);
+typedef EGLuint64NV (EGLAPIENTRYP PFNEGLGETSYSTEMTIMENVPROC) (void);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLuint64NV EGLAPIENTRY eglGetSystemTimeFrequencyNV (void);
+EGLAPI EGLuint64NV EGLAPIENTRY eglGetSystemTimeNV (void);
+#endif
+#endif /* KHRONOS_SUPPORT_INT64 */
+#endif /* EGL_NV_system_time */
+
+#ifndef EGL_TIZEN_image_native_buffer
+#define EGL_TIZEN_image_native_buffer 1
+#define EGL_NATIVE_BUFFER_TIZEN           0x32A0
+#endif /* EGL_TIZEN_image_native_buffer */
+
+#ifndef EGL_TIZEN_image_native_surface
+#define EGL_TIZEN_image_native_surface 1
+#define EGL_NATIVE_SURFACE_TIZEN          0x32A1
+#endif /* EGL_TIZEN_image_native_surface */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/opengles-3.1/include/EGL/eglplatform.h b/opengles-3.1/include/EGL/eglplatform.h
new file mode 100644
index 0000000000..cc4b100c30
--- /dev/null
+++ b/opengles-3.1/include/EGL/eglplatform.h
@@ -0,0 +1,122 @@
+#ifndef __eglplatform_h_
+#define __eglplatform_h_
+
+/*
+** Copyright (c) 2007-2009 The Khronos Group Inc.
+**
+** Permission is hereby granted, free of charge, to any person obtaining a
+** copy of this software and/or associated documentation files (the
+** "Materials"), to deal in the Materials without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Materials, and to
+** permit persons to whom the Materials are furnished to do so, subject to
+** the following conditions:
+**
+** The above copyright notice and this permission notice shall be included
+** in all copies or substantial portions of the Materials.
+**
+** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+*/
+
+/* Platform-specific types and definitions for egl.h
+ * $Revision: 12306 $ on $Date: 2010-08-25 09:51:28 -0700 (Wed, 25 Aug 2010) $
+ *
+ * Adopters may modify khrplatform.h and this file to suit their platform.
+ * You are encouraged to submit all modifications to the Khronos group so that
+ * they can be included in future versions of this file.  Please submit changes
+ * by sending them to the public Khronos Bugzilla (http://khronos.org/bugzilla)
+ * by filing a bug against product "EGL" component "Registry".
+ */
+
+#include <KHR/khrplatform.h>
+
+/* Macros used in EGL function prototype declarations.
+ *
+ * EGL functions should be prototyped as:
+ *
+ * EGLAPI return-type EGLAPIENTRY eglFunction(arguments);
+ * typedef return-type (EXPAPIENTRYP PFNEGLFUNCTIONPROC) (arguments);
+ *
+ * KHRONOS_APICALL and KHRONOS_APIENTRY are defined in KHR/khrplatform.h
+ */
+
+#ifndef EGLAPI
+#define EGLAPI KHRONOS_APICALL
+#endif
+
+#ifndef EGLAPIENTRY
+#define EGLAPIENTRY  KHRONOS_APIENTRY
+#endif
+#define EGLAPIENTRYP EGLAPIENTRY*
+
+/* The types NativeDisplayType, NativeWindowType, and NativePixmapType
+ * are aliases of window-system-dependent types, such as X Display * or
+ * Windows Device Context. They must be defined in platform-specific
+ * code below. The EGL-prefixed versions of Native*Type are the same
+ * types, renamed in EGL 1.3 so all types in the API start with "EGL".
+ *
+ * Khronos STRONGLY RECOMMENDS that you use the default definitions
+ * provided below, since these changes affect both binary and source
+ * portability of applications using EGL running on different EGL
+ * implementations.
+ */
+
+#if defined(_WIN32) || defined(__VC32__) && !defined(__CYGWIN__) && !defined(__SCITECH_SNAP__) /* Win32 and WinCE */
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN 1
+#endif
+#include <windows.h>
+
+typedef HDC     EGLNativeDisplayType;
+typedef HBITMAP EGLNativePixmapType;
+typedef HWND    EGLNativeWindowType;
+
+#elif defined(__WINSCW__) || defined(__SYMBIAN32__)  /* Symbian */
+
+typedef int   EGLNativeDisplayType;
+typedef void *EGLNativeWindowType;
+typedef void *EGLNativePixmapType;
+
+#elif (defined(__arm__) || defined(__aarch64__)) && defined(__gnu_linux__)  /* ARM Linux Mali */
+#include <EGL/fbdev_window.h>
+
+typedef void*         EGLNativeDisplayType;
+typedef void*         EGLNativePixmapType;
+typedef fbdev_window* EGLNativeWindowType;
+
+#elif defined(__unix__)
+
+/* X11 (tentative)  */
+#include <X11/Xlib.h>
+#include <X11/Xutil.h>
+
+typedef Display *EGLNativeDisplayType;
+typedef Pixmap   EGLNativePixmapType;
+typedef Window   EGLNativeWindowType;
+
+#else
+#error "Platform not recognized"
+#endif
+
+/* EGL 1.2 types, renamed for consistency in EGL 1.3 */
+typedef EGLNativeDisplayType NativeDisplayType;
+typedef EGLNativePixmapType  NativePixmapType;
+typedef EGLNativeWindowType  NativeWindowType;
+
+
+/* Define EGLint. This must be a signed integral type large enough to contain
+ * all legal attribute names and values passed into and out of EGL, whether
+ * their type is boolean, bitmask, enumerant (symbolic constant), integer,
+ * handle, or other.  While in general a 32-bit integer will suffice, if
+ * handles are 64 bit types, then EGLint should be defined as a signed 64-bit
+ * integer type.
+ */
+typedef khronos_int32_t EGLint;
+
+#endif /* __eglplatform_h */
diff --git a/opengles-3.1/include/GLES/gl.h b/opengles-3.1/include/GLES/gl.h
new file mode 100644
index 0000000000..5b8d85a920
--- /dev/null
+++ b/opengles-3.1/include/GLES/gl.h
@@ -0,0 +1,770 @@
+#ifndef __gl_h_
+#define __gl_h_
+
+/* $Revision: 10601 $ on $Date:: 2010-03-04 22:15:27 -0800 #$ */
+
+#include <GLES/glplatform.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * This document is licensed under the SGI Free Software B License Version
+ * 2.0. For details, see http://oss.sgi.com/projects/FreeB/ .
+ */
+
+typedef void             GLvoid;
+typedef char             GLchar;
+typedef unsigned int     GLenum;
+typedef unsigned char    GLboolean;
+typedef unsigned int     GLbitfield;
+typedef khronos_int8_t   GLbyte;
+typedef short            GLshort;
+typedef int              GLint;
+typedef int              GLsizei;
+typedef khronos_uint8_t  GLubyte;
+typedef unsigned short   GLushort;
+typedef unsigned int     GLuint;
+typedef khronos_float_t  GLfloat;
+typedef khronos_float_t  GLclampf;
+typedef khronos_int32_t  GLfixed;
+typedef khronos_int32_t  GLclampx;
+
+typedef khronos_intptr_t GLintptr;
+typedef khronos_ssize_t  GLsizeiptr;
+
+
+/*************************************************************/
+
+/* OpenGL ES core versions */
+#define GL_VERSION_ES_CM_1_0          1
+#define GL_VERSION_ES_CL_1_0          1
+#define GL_VERSION_ES_CM_1_1          1
+#define GL_VERSION_ES_CL_1_1          1
+
+/* ClearBufferMask */
+#define GL_DEPTH_BUFFER_BIT               0x00000100
+#define GL_STENCIL_BUFFER_BIT             0x00000400
+#define GL_COLOR_BUFFER_BIT               0x00004000
+
+/* Boolean */
+#define GL_FALSE                          0
+#define GL_TRUE                           1
+
+/* BeginMode */
+#define GL_POINTS                         0x0000
+#define GL_LINES                          0x0001
+#define GL_LINE_LOOP                      0x0002
+#define GL_LINE_STRIP                     0x0003
+#define GL_TRIANGLES                      0x0004
+#define GL_TRIANGLE_STRIP                 0x0005
+#define GL_TRIANGLE_FAN                   0x0006
+
+/* AlphaFunction */
+#define GL_NEVER                          0x0200
+#define GL_LESS                           0x0201
+#define GL_EQUAL                          0x0202
+#define GL_LEQUAL                         0x0203
+#define GL_GREATER                        0x0204
+#define GL_NOTEQUAL                       0x0205
+#define GL_GEQUAL                         0x0206
+#define GL_ALWAYS                         0x0207
+
+/* BlendingFactorDest */
+#define GL_ZERO                           0
+#define GL_ONE                            1
+#define GL_SRC_COLOR                      0x0300
+#define GL_ONE_MINUS_SRC_COLOR            0x0301
+#define GL_SRC_ALPHA                      0x0302
+#define GL_ONE_MINUS_SRC_ALPHA            0x0303
+#define GL_DST_ALPHA                      0x0304
+#define GL_ONE_MINUS_DST_ALPHA            0x0305
+
+/* BlendingFactorSrc */
+/*      GL_ZERO */
+/*      GL_ONE */
+#define GL_DST_COLOR                      0x0306
+#define GL_ONE_MINUS_DST_COLOR            0x0307
+#define GL_SRC_ALPHA_SATURATE             0x0308
+/*      GL_SRC_ALPHA */
+/*      GL_ONE_MINUS_SRC_ALPHA */
+/*      GL_DST_ALPHA */
+/*      GL_ONE_MINUS_DST_ALPHA */
+
+/* ClipPlaneName */
+#define GL_CLIP_PLANE0                    0x3000
+#define GL_CLIP_PLANE1                    0x3001
+#define GL_CLIP_PLANE2                    0x3002
+#define GL_CLIP_PLANE3                    0x3003
+#define GL_CLIP_PLANE4                    0x3004
+#define GL_CLIP_PLANE5                    0x3005
+
+/* ColorMaterialFace */
+/*      GL_FRONT_AND_BACK */
+
+/* ColorMaterialParameter */
+/*      GL_AMBIENT_AND_DIFFUSE */
+
+/* ColorPointerType */
+/*      GL_UNSIGNED_BYTE */
+/*      GL_FLOAT */
+/*      GL_FIXED */
+
+/* CullFaceMode */
+#define GL_FRONT                          0x0404
+#define GL_BACK                           0x0405
+#define GL_FRONT_AND_BACK                 0x0408
+
+/* DepthFunction */
+/*      GL_NEVER */
+/*      GL_LESS */
+/*      GL_EQUAL */
+/*      GL_LEQUAL */
+/*      GL_GREATER */
+/*      GL_NOTEQUAL */
+/*      GL_GEQUAL */
+/*      GL_ALWAYS */
+
+/* EnableCap */
+#define GL_FOG                            0x0B60
+#define GL_LIGHTING                       0x0B50
+#define GL_TEXTURE_2D                     0x0DE1
+#define GL_CULL_FACE                      0x0B44
+#define GL_ALPHA_TEST                     0x0BC0
+#define GL_BLEND                          0x0BE2
+#define GL_COLOR_LOGIC_OP                 0x0BF2
+#define GL_DITHER                         0x0BD0
+#define GL_STENCIL_TEST                   0x0B90
+#define GL_DEPTH_TEST                     0x0B71
+/*      GL_LIGHT0 */
+/*      GL_LIGHT1 */
+/*      GL_LIGHT2 */
+/*      GL_LIGHT3 */
+/*      GL_LIGHT4 */
+/*      GL_LIGHT5 */
+/*      GL_LIGHT6 */
+/*      GL_LIGHT7 */
+#define GL_POINT_SMOOTH                   0x0B10
+#define GL_LINE_SMOOTH                    0x0B20
+#define GL_SCISSOR_TEST                   0x0C11
+#define GL_COLOR_MATERIAL                 0x0B57
+#define GL_NORMALIZE                      0x0BA1
+#define GL_RESCALE_NORMAL                 0x803A
+#define GL_POLYGON_OFFSET_FILL            0x8037
+#define GL_VERTEX_ARRAY                   0x8074
+#define GL_NORMAL_ARRAY                   0x8075
+#define GL_COLOR_ARRAY                    0x8076
+#define GL_TEXTURE_COORD_ARRAY            0x8078
+#define GL_MULTISAMPLE                    0x809D
+#define GL_SAMPLE_ALPHA_TO_COVERAGE       0x809E
+#define GL_SAMPLE_ALPHA_TO_ONE            0x809F
+#define GL_SAMPLE_COVERAGE                0x80A0
+
+/* ErrorCode */
+#define GL_NO_ERROR                       0
+#define GL_INVALID_ENUM                   0x0500
+#define GL_INVALID_VALUE                  0x0501
+#define GL_INVALID_OPERATION              0x0502
+#define GL_STACK_OVERFLOW                 0x0503
+#define GL_STACK_UNDERFLOW                0x0504
+#define GL_OUT_OF_MEMORY                  0x0505
+
+/* FogMode */
+/*      GL_LINEAR */
+#define GL_EXP                            0x0800
+#define GL_EXP2                           0x0801
+
+/* FogParameter */
+#define GL_FOG_DENSITY                    0x0B62
+#define GL_FOG_START                      0x0B63
+#define GL_FOG_END                        0x0B64
+#define GL_FOG_MODE                       0x0B65
+#define GL_FOG_COLOR                      0x0B66
+
+/* FrontFaceDirection */
+#define GL_CW                             0x0900
+#define GL_CCW                            0x0901
+
+/* GetPName */
+#define GL_CURRENT_COLOR                  0x0B00
+#define GL_CURRENT_NORMAL                 0x0B02
+#define GL_CURRENT_TEXTURE_COORDS         0x0B03
+#define GL_POINT_SIZE                     0x0B11
+#define GL_POINT_SIZE_MIN                 0x8126
+#define GL_POINT_SIZE_MAX                 0x8127
+#define GL_POINT_FADE_THRESHOLD_SIZE      0x8128
+#define GL_POINT_DISTANCE_ATTENUATION     0x8129
+#define GL_SMOOTH_POINT_SIZE_RANGE        0x0B12
+#define GL_LINE_WIDTH                     0x0B21
+#define GL_SMOOTH_LINE_WIDTH_RANGE        0x0B22
+#define GL_ALIASED_POINT_SIZE_RANGE       0x846D
+#define GL_ALIASED_LINE_WIDTH_RANGE       0x846E
+#define GL_CULL_FACE_MODE                 0x0B45
+#define GL_FRONT_FACE                     0x0B46
+#define GL_SHADE_MODEL                    0x0B54
+#define GL_DEPTH_RANGE                    0x0B70
+#define GL_DEPTH_WRITEMASK                0x0B72
+#define GL_DEPTH_CLEAR_VALUE              0x0B73
+#define GL_DEPTH_FUNC                     0x0B74
+#define GL_STENCIL_CLEAR_VALUE            0x0B91
+#define GL_STENCIL_FUNC                   0x0B92
+#define GL_STENCIL_VALUE_MASK             0x0B93
+#define GL_STENCIL_FAIL                   0x0B94
+#define GL_STENCIL_PASS_DEPTH_FAIL        0x0B95
+#define GL_STENCIL_PASS_DEPTH_PASS        0x0B96
+#define GL_STENCIL_REF                    0x0B97
+#define GL_STENCIL_WRITEMASK              0x0B98
+#define GL_MATRIX_MODE                    0x0BA0
+#define GL_VIEWPORT                       0x0BA2
+#define GL_MODELVIEW_STACK_DEPTH          0x0BA3
+#define GL_PROJECTION_STACK_DEPTH         0x0BA4
+#define GL_TEXTURE_STACK_DEPTH            0x0BA5
+#define GL_MODELVIEW_MATRIX               0x0BA6
+#define GL_PROJECTION_MATRIX              0x0BA7
+#define GL_TEXTURE_MATRIX                 0x0BA8
+#define GL_ALPHA_TEST_FUNC                0x0BC1
+#define GL_ALPHA_TEST_REF                 0x0BC2
+#define GL_BLEND_DST                      0x0BE0
+#define GL_BLEND_SRC                      0x0BE1
+#define GL_LOGIC_OP_MODE                  0x0BF0
+#define GL_SCISSOR_BOX                    0x0C10
+#define GL_SCISSOR_TEST                   0x0C11
+#define GL_COLOR_CLEAR_VALUE              0x0C22
+#define GL_COLOR_WRITEMASK                0x0C23
+#define GL_UNPACK_ALIGNMENT               0x0CF5
+#define GL_PACK_ALIGNMENT                 0x0D05
+#define GL_MAX_LIGHTS                     0x0D31
+#define GL_MAX_CLIP_PLANES                0x0D32
+#define GL_MAX_TEXTURE_SIZE               0x0D33
+#define GL_MAX_MODELVIEW_STACK_DEPTH      0x0D36
+#define GL_MAX_PROJECTION_STACK_DEPTH     0x0D38
+#define GL_MAX_TEXTURE_STACK_DEPTH        0x0D39
+#define GL_MAX_VIEWPORT_DIMS              0x0D3A
+#define GL_MAX_TEXTURE_UNITS              0x84E2
+#define GL_SUBPIXEL_BITS                  0x0D50
+#define GL_RED_BITS                       0x0D52
+#define GL_GREEN_BITS                     0x0D53
+#define GL_BLUE_BITS                      0x0D54
+#define GL_ALPHA_BITS                     0x0D55
+#define GL_DEPTH_BITS                     0x0D56
+#define GL_STENCIL_BITS                   0x0D57
+#define GL_POLYGON_OFFSET_UNITS           0x2A00
+#define GL_POLYGON_OFFSET_FILL            0x8037
+#define GL_POLYGON_OFFSET_FACTOR          0x8038
+#define GL_TEXTURE_BINDING_2D             0x8069
+#define GL_VERTEX_ARRAY_SIZE              0x807A
+#define GL_VERTEX_ARRAY_TYPE              0x807B
+#define GL_VERTEX_ARRAY_STRIDE            0x807C
+#define GL_NORMAL_ARRAY_TYPE              0x807E
+#define GL_NORMAL_ARRAY_STRIDE            0x807F
+#define GL_COLOR_ARRAY_SIZE               0x8081
+#define GL_COLOR_ARRAY_TYPE               0x8082
+#define GL_COLOR_ARRAY_STRIDE             0x8083
+#define GL_TEXTURE_COORD_ARRAY_SIZE       0x8088
+#define GL_TEXTURE_COORD_ARRAY_TYPE       0x8089
+#define GL_TEXTURE_COORD_ARRAY_STRIDE     0x808A
+#define GL_VERTEX_ARRAY_POINTER           0x808E
+#define GL_NORMAL_ARRAY_POINTER           0x808F
+#define GL_COLOR_ARRAY_POINTER            0x8090
+#define GL_TEXTURE_COORD_ARRAY_POINTER    0x8092
+#define GL_SAMPLE_BUFFERS                 0x80A8
+#define GL_SAMPLES                        0x80A9
+#define GL_SAMPLE_COVERAGE_VALUE          0x80AA
+#define GL_SAMPLE_COVERAGE_INVERT         0x80AB
+
+/* GetTextureParameter */
+/*      GL_TEXTURE_MAG_FILTER */
+/*      GL_TEXTURE_MIN_FILTER */
+/*      GL_TEXTURE_WRAP_S */
+/*      GL_TEXTURE_WRAP_T */
+
+#define GL_NUM_COMPRESSED_TEXTURE_FORMATS 0x86A2
+#define GL_COMPRESSED_TEXTURE_FORMATS     0x86A3
+
+/* HintMode */
+#define GL_DONT_CARE                      0x1100
+#define GL_FASTEST                        0x1101
+#define GL_NICEST                         0x1102
+
+/* HintTarget */
+#define GL_PERSPECTIVE_CORRECTION_HINT    0x0C50
+#define GL_POINT_SMOOTH_HINT              0x0C51
+#define GL_LINE_SMOOTH_HINT               0x0C52
+#define GL_FOG_HINT                       0x0C54
+#define GL_GENERATE_MIPMAP_HINT           0x8192
+
+/* LightModelParameter */
+#define GL_LIGHT_MODEL_AMBIENT            0x0B53
+#define GL_LIGHT_MODEL_TWO_SIDE           0x0B52
+
+/* LightParameter */
+#define GL_AMBIENT                        0x1200
+#define GL_DIFFUSE                        0x1201
+#define GL_SPECULAR                       0x1202
+#define GL_POSITION                       0x1203
+#define GL_SPOT_DIRECTION                 0x1204
+#define GL_SPOT_EXPONENT                  0x1205
+#define GL_SPOT_CUTOFF                    0x1206
+#define GL_CONSTANT_ATTENUATION           0x1207
+#define GL_LINEAR_ATTENUATION             0x1208
+#define GL_QUADRATIC_ATTENUATION          0x1209
+
+/* DataType */
+#define GL_BYTE                           0x1400
+#define GL_UNSIGNED_BYTE                  0x1401
+#define GL_SHORT                          0x1402
+#define GL_UNSIGNED_SHORT                 0x1403
+#define GL_FLOAT                          0x1406
+#define GL_FIXED                          0x140C
+
+/* LogicOp */
+#define GL_CLEAR                          0x1500
+#define GL_AND                            0x1501
+#define GL_AND_REVERSE                    0x1502
+#define GL_COPY                           0x1503
+#define GL_AND_INVERTED                   0x1504
+#define GL_NOOP                           0x1505
+#define GL_XOR                            0x1506
+#define GL_OR                             0x1507
+#define GL_NOR                            0x1508
+#define GL_EQUIV                          0x1509
+#define GL_INVERT                         0x150A
+#define GL_OR_REVERSE                     0x150B
+#define GL_COPY_INVERTED                  0x150C
+#define GL_OR_INVERTED                    0x150D
+#define GL_NAND                           0x150E
+#define GL_SET                            0x150F
+
+/* MaterialFace */
+/*      GL_FRONT_AND_BACK */
+
+/* MaterialParameter */
+#define GL_EMISSION                       0x1600
+#define GL_SHININESS                      0x1601
+#define GL_AMBIENT_AND_DIFFUSE            0x1602
+/*      GL_AMBIENT */
+/*      GL_DIFFUSE */
+/*      GL_SPECULAR */
+
+/* MatrixMode */
+#define GL_MODELVIEW                      0x1700
+#define GL_PROJECTION                     0x1701
+#define GL_TEXTURE                        0x1702
+
+/* NormalPointerType */
+/*      GL_BYTE */
+/*      GL_SHORT */
+/*      GL_FLOAT */
+/*      GL_FIXED */
+
+/* PixelFormat */
+#define GL_ALPHA                          0x1906
+#define GL_RGB                            0x1907
+#define GL_RGBA                           0x1908
+#define GL_LUMINANCE                      0x1909
+#define GL_LUMINANCE_ALPHA                0x190A
+
+/* PixelStoreParameter */
+#define GL_UNPACK_ALIGNMENT               0x0CF5
+#define GL_PACK_ALIGNMENT                 0x0D05
+
+/* PixelType */
+/*      GL_UNSIGNED_BYTE */
+#define GL_UNSIGNED_SHORT_4_4_4_4         0x8033
+#define GL_UNSIGNED_SHORT_5_5_5_1         0x8034
+#define GL_UNSIGNED_SHORT_5_6_5           0x8363
+
+/* ShadingModel */
+#define GL_FLAT                           0x1D00
+#define GL_SMOOTH                         0x1D01
+
+/* StencilFunction */
+/*      GL_NEVER */
+/*      GL_LESS */
+/*      GL_EQUAL */
+/*      GL_LEQUAL */
+/*      GL_GREATER */
+/*      GL_NOTEQUAL */
+/*      GL_GEQUAL */
+/*      GL_ALWAYS */
+
+/* StencilOp */
+/*      GL_ZERO */
+#define GL_KEEP                           0x1E00
+#define GL_REPLACE                        0x1E01
+#define GL_INCR                           0x1E02
+#define GL_DECR                           0x1E03
+/*      GL_INVERT */
+
+/* StringName */
+#define GL_VENDOR                         0x1F00
+#define GL_RENDERER                       0x1F01
+#define GL_VERSION                        0x1F02
+#define GL_EXTENSIONS                     0x1F03
+
+/* TexCoordPointerType */
+/*      GL_SHORT */
+/*      GL_FLOAT */
+/*      GL_FIXED */
+/*      GL_BYTE */
+
+/* TextureEnvMode */
+#define GL_MODULATE                       0x2100
+#define GL_DECAL                          0x2101
+/*      GL_BLEND */
+#define GL_ADD                            0x0104
+/*      GL_REPLACE */
+
+/* TextureEnvParameter */
+#define GL_TEXTURE_ENV_MODE               0x2200
+#define GL_TEXTURE_ENV_COLOR              0x2201
+
+/* TextureEnvTarget */
+#define GL_TEXTURE_ENV                    0x2300
+
+/* TextureMagFilter */
+#define GL_NEAREST                        0x2600
+#define GL_LINEAR                         0x2601
+
+/* TextureMinFilter */
+/*      GL_NEAREST */
+/*      GL_LINEAR */
+#define GL_NEAREST_MIPMAP_NEAREST         0x2700
+#define GL_LINEAR_MIPMAP_NEAREST          0x2701
+#define GL_NEAREST_MIPMAP_LINEAR          0x2702
+#define GL_LINEAR_MIPMAP_LINEAR           0x2703
+
+/* TextureParameterName */
+#define GL_TEXTURE_MAG_FILTER             0x2800
+#define GL_TEXTURE_MIN_FILTER             0x2801
+#define GL_TEXTURE_WRAP_S                 0x2802
+#define GL_TEXTURE_WRAP_T                 0x2803
+#define GL_GENERATE_MIPMAP                0x8191
+
+/* TextureTarget */
+/*      GL_TEXTURE_2D */
+
+/* TextureUnit */
+#define GL_TEXTURE0                       0x84C0
+#define GL_TEXTURE1                       0x84C1
+#define GL_TEXTURE2                       0x84C2
+#define GL_TEXTURE3                       0x84C3
+#define GL_TEXTURE4                       0x84C4
+#define GL_TEXTURE5                       0x84C5
+#define GL_TEXTURE6                       0x84C6
+#define GL_TEXTURE7                       0x84C7
+#define GL_TEXTURE8                       0x84C8
+#define GL_TEXTURE9                       0x84C9
+#define GL_TEXTURE10                      0x84CA
+#define GL_TEXTURE11                      0x84CB
+#define GL_TEXTURE12                      0x84CC
+#define GL_TEXTURE13                      0x84CD
+#define GL_TEXTURE14                      0x84CE
+#define GL_TEXTURE15                      0x84CF
+#define GL_TEXTURE16                      0x84D0
+#define GL_TEXTURE17                      0x84D1
+#define GL_TEXTURE18                      0x84D2
+#define GL_TEXTURE19                      0x84D3
+#define GL_TEXTURE20                      0x84D4
+#define GL_TEXTURE21                      0x84D5
+#define GL_TEXTURE22                      0x84D6
+#define GL_TEXTURE23                      0x84D7
+#define GL_TEXTURE24                      0x84D8
+#define GL_TEXTURE25                      0x84D9
+#define GL_TEXTURE26                      0x84DA
+#define GL_TEXTURE27                      0x84DB
+#define GL_TEXTURE28                      0x84DC
+#define GL_TEXTURE29                      0x84DD
+#define GL_TEXTURE30                      0x84DE
+#define GL_TEXTURE31                      0x84DF
+#define GL_ACTIVE_TEXTURE                 0x84E0
+#define GL_CLIENT_ACTIVE_TEXTURE          0x84E1
+
+/* TextureWrapMode */
+#define GL_REPEAT                         0x2901
+#define GL_CLAMP_TO_EDGE                  0x812F
+
+/* VertexPointerType */
+/*      GL_SHORT */
+/*      GL_FLOAT */
+/*      GL_FIXED */
+/*      GL_BYTE */
+
+/* LightName */
+#define GL_LIGHT0                         0x4000
+#define GL_LIGHT1                         0x4001
+#define GL_LIGHT2                         0x4002
+#define GL_LIGHT3                         0x4003
+#define GL_LIGHT4                         0x4004
+#define GL_LIGHT5                         0x4005
+#define GL_LIGHT6                         0x4006
+#define GL_LIGHT7                         0x4007
+
+/* Buffer Objects */
+#define GL_ARRAY_BUFFER                   0x8892
+#define GL_ELEMENT_ARRAY_BUFFER           0x8893
+
+#define GL_ARRAY_BUFFER_BINDING               0x8894
+#define GL_ELEMENT_ARRAY_BUFFER_BINDING       0x8895
+#define GL_VERTEX_ARRAY_BUFFER_BINDING        0x8896
+#define GL_NORMAL_ARRAY_BUFFER_BINDING        0x8897
+#define GL_COLOR_ARRAY_BUFFER_BINDING         0x8898
+#define GL_TEXTURE_COORD_ARRAY_BUFFER_BINDING 0x889A
+
+#define GL_STATIC_DRAW                    0x88E4
+#define GL_DYNAMIC_DRAW                   0x88E8
+
+#define GL_BUFFER_SIZE                    0x8764
+#define GL_BUFFER_USAGE                   0x8765
+
+/* Texture combine + dot3 */
+#define GL_SUBTRACT                       0x84E7
+#define GL_COMBINE                        0x8570
+#define GL_COMBINE_RGB                    0x8571
+#define GL_COMBINE_ALPHA                  0x8572
+#define GL_RGB_SCALE                      0x8573
+#define GL_ADD_SIGNED                     0x8574
+#define GL_INTERPOLATE                    0x8575
+#define GL_CONSTANT                       0x8576
+#define GL_PRIMARY_COLOR                  0x8577
+#define GL_PREVIOUS                       0x8578
+#define GL_OPERAND0_RGB                   0x8590
+#define GL_OPERAND1_RGB                   0x8591
+#define GL_OPERAND2_RGB                   0x8592
+#define GL_OPERAND0_ALPHA                 0x8598
+#define GL_OPERAND1_ALPHA                 0x8599
+#define GL_OPERAND2_ALPHA                 0x859A
+
+#define GL_ALPHA_SCALE                    0x0D1C
+
+#define GL_SRC0_RGB                       0x8580
+#define GL_SRC1_RGB                       0x8581
+#define GL_SRC2_RGB                       0x8582
+#define GL_SRC0_ALPHA                     0x8588
+#define GL_SRC1_ALPHA                     0x8589
+#define GL_SRC2_ALPHA                     0x858A
+
+#define GL_DOT3_RGB                       0x86AE
+#define GL_DOT3_RGBA                      0x86AF
+
+/*------------------------------------------------------------------------*
+ * required OES extension tokens
+ *------------------------------------------------------------------------*/
+
+/* OES_read_format */
+#ifndef GL_OES_read_format
+#define GL_IMPLEMENTATION_COLOR_READ_TYPE_OES                   0x8B9A
+#define GL_IMPLEMENTATION_COLOR_READ_FORMAT_OES                 0x8B9B
+#endif
+
+/* GL_OES_compressed_paletted_texture */
+#ifndef GL_OES_compressed_paletted_texture
+#define GL_PALETTE4_RGB8_OES                                    0x8B90
+#define GL_PALETTE4_RGBA8_OES                                   0x8B91
+#define GL_PALETTE4_R5_G6_B5_OES                                0x8B92
+#define GL_PALETTE4_RGBA4_OES                                   0x8B93
+#define GL_PALETTE4_RGB5_A1_OES                                 0x8B94
+#define GL_PALETTE8_RGB8_OES                                    0x8B95
+#define GL_PALETTE8_RGBA8_OES                                   0x8B96
+#define GL_PALETTE8_R5_G6_B5_OES                                0x8B97
+#define GL_PALETTE8_RGBA4_OES                                   0x8B98
+#define GL_PALETTE8_RGB5_A1_OES                                 0x8B99
+#endif
+
+/* OES_point_size_array */
+#ifndef GL_OES_point_size_array
+#define GL_POINT_SIZE_ARRAY_OES                                 0x8B9C
+#define GL_POINT_SIZE_ARRAY_TYPE_OES                            0x898A
+#define GL_POINT_SIZE_ARRAY_STRIDE_OES                          0x898B
+#define GL_POINT_SIZE_ARRAY_POINTER_OES                         0x898C
+#define GL_POINT_SIZE_ARRAY_BUFFER_BINDING_OES                  0x8B9F
+#endif
+
+/* GL_OES_point_sprite */
+#ifndef GL_OES_point_sprite
+#define GL_POINT_SPRITE_OES                                     0x8861
+#define GL_COORD_REPLACE_OES                                    0x8862
+#endif
+
+/*************************************************************/
+
+/* Available only in Common profile */
+GL_API void GL_APIENTRY glAlphaFunc (GLenum func, GLclampf ref);
+GL_API void GL_APIENTRY glClearColor (GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha);
+GL_API void GL_APIENTRY glClearDepthf (GLclampf depth);
+GL_API void GL_APIENTRY glClipPlanef (GLenum plane, const GLfloat *equation);
+GL_API void GL_APIENTRY glColor4f (GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha);
+GL_API void GL_APIENTRY glDepthRangef (GLclampf zNear, GLclampf zFar);
+GL_API void GL_APIENTRY glFogf (GLenum pname, GLfloat param);
+GL_API void GL_APIENTRY glFogfv (GLenum pname, const GLfloat *params);
+GL_API void GL_APIENTRY glFrustumf (GLfloat left, GLfloat right, GLfloat bottom, GLfloat top, GLfloat zNear, GLfloat zFar);
+GL_API void GL_APIENTRY glGetClipPlanef (GLenum pname, GLfloat eqn[4]);
+GL_API void GL_APIENTRY glGetFloatv (GLenum pname, GLfloat *params);
+GL_API void GL_APIENTRY glGetLightfv (GLenum light, GLenum pname, GLfloat *params);
+GL_API void GL_APIENTRY glGetMaterialfv (GLenum face, GLenum pname, GLfloat *params);
+GL_API void GL_APIENTRY glGetTexEnvfv (GLenum env, GLenum pname, GLfloat *params);
+GL_API void GL_APIENTRY glGetTexParameterfv (GLenum target, GLenum pname, GLfloat *params);
+GL_API void GL_APIENTRY glLightModelf (GLenum pname, GLfloat param);
+GL_API void GL_APIENTRY glLightModelfv (GLenum pname, const GLfloat *params);
+GL_API void GL_APIENTRY glLightf (GLenum light, GLenum pname, GLfloat param);
+GL_API void GL_APIENTRY glLightfv (GLenum light, GLenum pname, const GLfloat *params);
+GL_API void GL_APIENTRY glLineWidth (GLfloat width);
+GL_API void GL_APIENTRY glLoadMatrixf (const GLfloat *m);
+GL_API void GL_APIENTRY glMaterialf (GLenum face, GLenum pname, GLfloat param);
+GL_API void GL_APIENTRY glMaterialfv (GLenum face, GLenum pname, const GLfloat *params);
+GL_API void GL_APIENTRY glMultMatrixf (const GLfloat *m);
+GL_API void GL_APIENTRY glMultiTexCoord4f (GLenum target, GLfloat s, GLfloat t, GLfloat r, GLfloat q);
+GL_API void GL_APIENTRY glNormal3f (GLfloat nx, GLfloat ny, GLfloat nz);
+GL_API void GL_APIENTRY glOrthof (GLfloat left, GLfloat right, GLfloat bottom, GLfloat top, GLfloat zNear, GLfloat zFar);
+GL_API void GL_APIENTRY glPointParameterf (GLenum pname, GLfloat param);
+GL_API void GL_APIENTRY glPointParameterfv (GLenum pname, const GLfloat *params);
+GL_API void GL_APIENTRY glPointSize (GLfloat size);
+GL_API void GL_APIENTRY glPolygonOffset (GLfloat factor, GLfloat units);
+GL_API void GL_APIENTRY glRotatef (GLfloat angle, GLfloat x, GLfloat y, GLfloat z);
+GL_API void GL_APIENTRY glScalef (GLfloat x, GLfloat y, GLfloat z);
+GL_API void GL_APIENTRY glTexEnvf (GLenum target, GLenum pname, GLfloat param);
+GL_API void GL_APIENTRY glTexEnvfv (GLenum target, GLenum pname, const GLfloat *params);
+GL_API void GL_APIENTRY glTexParameterf (GLenum target, GLenum pname, GLfloat param);
+GL_API void GL_APIENTRY glTexParameterfv (GLenum target, GLenum pname, const GLfloat *params);
+GL_API void GL_APIENTRY glTranslatef (GLfloat x, GLfloat y, GLfloat z);
+
+/* Available in both Common and Common-Lite profiles */
+GL_API void GL_APIENTRY glActiveTexture (GLenum texture);
+GL_API void GL_APIENTRY glAlphaFuncx (GLenum func, GLclampx ref);
+GL_API void GL_APIENTRY glBindBuffer (GLenum target, GLuint buffer);
+GL_API void GL_APIENTRY glBindTexture (GLenum target, GLuint texture);
+GL_API void GL_APIENTRY glBlendFunc (GLenum sfactor, GLenum dfactor);
+GL_API void GL_APIENTRY glBufferData (GLenum target, GLsizeiptr size, const GLvoid *data, GLenum usage);
+GL_API void GL_APIENTRY glBufferSubData (GLenum target, GLintptr offset, GLsizeiptr size, const GLvoid *data);
+GL_API void GL_APIENTRY glClear (GLbitfield mask);
+GL_API void GL_APIENTRY glClearColorx (GLclampx red, GLclampx green, GLclampx blue, GLclampx alpha);
+GL_API void GL_APIENTRY glClearDepthx (GLclampx depth);
+GL_API void GL_APIENTRY glClearStencil (GLint s);
+GL_API void GL_APIENTRY glClientActiveTexture (GLenum texture);
+GL_API void GL_APIENTRY glClipPlanex (GLenum plane, const GLfixed *equation);
+GL_API void GL_APIENTRY glColor4ub (GLubyte red, GLubyte green, GLubyte blue, GLubyte alpha);
+GL_API void GL_APIENTRY glColor4x (GLfixed red, GLfixed green, GLfixed blue, GLfixed alpha);
+GL_API void GL_APIENTRY glColorMask (GLboolean red, GLboolean green, GLboolean blue, GLboolean alpha);
+GL_API void GL_APIENTRY glColorPointer (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer);
+GL_API void GL_APIENTRY glCompressedTexImage2D (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const GLvoid *data);
+GL_API void GL_APIENTRY glCompressedTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const GLvoid *data);
+GL_API void GL_APIENTRY glCopyTexImage2D (GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border);
+GL_API void GL_APIENTRY glCopyTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+GL_API void GL_APIENTRY glCullFace (GLenum mode);
+GL_API void GL_APIENTRY glDeleteBuffers (GLsizei n, const GLuint *buffers);
+GL_API void GL_APIENTRY glDeleteTextures (GLsizei n, const GLuint *textures);
+GL_API void GL_APIENTRY glDepthFunc (GLenum func);
+GL_API void GL_APIENTRY glDepthMask (GLboolean flag);
+GL_API void GL_APIENTRY glDepthRangex (GLclampx zNear, GLclampx zFar);
+GL_API void GL_APIENTRY glDisable (GLenum cap);
+GL_API void GL_APIENTRY glDisableClientState (GLenum array);
+GL_API void GL_APIENTRY glDrawArrays (GLenum mode, GLint first, GLsizei count);
+GL_API void GL_APIENTRY glDrawElements (GLenum mode, GLsizei count, GLenum type, const GLvoid *indices);
+GL_API void GL_APIENTRY glEnable (GLenum cap);
+GL_API void GL_APIENTRY glEnableClientState (GLenum array);
+GL_API void GL_APIENTRY glFinish (void);
+GL_API void GL_APIENTRY glFlush (void);
+GL_API void GL_APIENTRY glFogx (GLenum pname, GLfixed param);
+GL_API void GL_APIENTRY glFogxv (GLenum pname, const GLfixed *params);
+GL_API void GL_APIENTRY glFrontFace (GLenum mode);
+GL_API void GL_APIENTRY glFrustumx (GLfixed left, GLfixed right, GLfixed bottom, GLfixed top, GLfixed zNear, GLfixed zFar);
+GL_API void GL_APIENTRY glGetBooleanv (GLenum pname, GLboolean *params);
+GL_API void GL_APIENTRY glGetBufferParameteriv (GLenum target, GLenum pname, GLint *params);
+GL_API void GL_APIENTRY glGetClipPlanex (GLenum pname, GLfixed eqn[4]);
+GL_API void GL_APIENTRY glGenBuffers (GLsizei n, GLuint *buffers);
+GL_API void GL_APIENTRY glGenTextures (GLsizei n, GLuint *textures);
+GL_API GLenum GL_APIENTRY glGetError (void);
+GL_API void GL_APIENTRY glGetFixedv (GLenum pname, GLfixed *params);
+GL_API void GL_APIENTRY glGetIntegerv (GLenum pname, GLint *params);
+GL_API void GL_APIENTRY glGetLightxv (GLenum light, GLenum pname, GLfixed *params);
+GL_API void GL_APIENTRY glGetMaterialxv (GLenum face, GLenum pname, GLfixed *params);
+GL_API void GL_APIENTRY glGetPointerv (GLenum pname, GLvoid **params);
+GL_API const GLubyte * GL_APIENTRY glGetString (GLenum name);
+GL_API void GL_APIENTRY glGetTexEnviv (GLenum env, GLenum pname, GLint *params);
+GL_API void GL_APIENTRY glGetTexEnvxv (GLenum env, GLenum pname, GLfixed *params);
+GL_API void GL_APIENTRY glGetTexParameteriv (GLenum target, GLenum pname, GLint *params);
+GL_API void GL_APIENTRY glGetTexParameterxv (GLenum target, GLenum pname, GLfixed *params);
+GL_API void GL_APIENTRY glHint (GLenum target, GLenum mode);
+GL_API GLboolean GL_APIENTRY glIsBuffer (GLuint buffer);
+GL_API GLboolean GL_APIENTRY glIsEnabled (GLenum cap);
+GL_API GLboolean GL_APIENTRY glIsTexture (GLuint texture);
+GL_API void GL_APIENTRY glLightModelx (GLenum pname, GLfixed param);
+GL_API void GL_APIENTRY glLightModelxv (GLenum pname, const GLfixed *params);
+GL_API void GL_APIENTRY glLightx (GLenum light, GLenum pname, GLfixed param);
+GL_API void GL_APIENTRY glLightxv (GLenum light, GLenum pname, const GLfixed *params);
+GL_API void GL_APIENTRY glLineWidthx (GLfixed width);
+GL_API void GL_APIENTRY glLoadIdentity (void);
+GL_API void GL_APIENTRY glLoadMatrixx (const GLfixed *m);
+GL_API void GL_APIENTRY glLogicOp (GLenum opcode);
+GL_API void GL_APIENTRY glMaterialx (GLenum face, GLenum pname, GLfixed param);
+GL_API void GL_APIENTRY glMaterialxv (GLenum face, GLenum pname, const GLfixed *params);
+GL_API void GL_APIENTRY glMatrixMode (GLenum mode);
+GL_API void GL_APIENTRY glMultMatrixx (const GLfixed *m);
+GL_API void GL_APIENTRY glMultiTexCoord4x (GLenum target, GLfixed s, GLfixed t, GLfixed r, GLfixed q);
+GL_API void GL_APIENTRY glNormal3x (GLfixed nx, GLfixed ny, GLfixed nz);
+GL_API void GL_APIENTRY glNormalPointer (GLenum type, GLsizei stride, const GLvoid *pointer);
+GL_API void GL_APIENTRY glOrthox (GLfixed left, GLfixed right, GLfixed bottom, GLfixed top, GLfixed zNear, GLfixed zFar);
+GL_API void GL_APIENTRY glPixelStorei (GLenum pname, GLint param);
+GL_API void GL_APIENTRY glPointParameterx (GLenum pname, GLfixed param);
+GL_API void GL_APIENTRY glPointParameterxv (GLenum pname, const GLfixed *params);
+GL_API void GL_APIENTRY glPointSizex (GLfixed size);
+GL_API void GL_APIENTRY glPolygonOffsetx (GLfixed factor, GLfixed units);
+GL_API void GL_APIENTRY glPopMatrix (void);
+GL_API void GL_APIENTRY glPushMatrix (void);
+GL_API void GL_APIENTRY glReadPixels (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLvoid *pixels);
+GL_API void GL_APIENTRY glRotatex (GLfixed angle, GLfixed x, GLfixed y, GLfixed z);
+GL_API void GL_APIENTRY glSampleCoverage (GLclampf value, GLboolean invert);
+GL_API void GL_APIENTRY glSampleCoveragex (GLclampx value, GLboolean invert);
+GL_API void GL_APIENTRY glScalex (GLfixed x, GLfixed y, GLfixed z);
+GL_API void GL_APIENTRY glScissor (GLint x, GLint y, GLsizei width, GLsizei height);
+GL_API void GL_APIENTRY glShadeModel (GLenum mode);
+GL_API void GL_APIENTRY glStencilFunc (GLenum func, GLint ref, GLuint mask);
+GL_API void GL_APIENTRY glStencilMask (GLuint mask);
+GL_API void GL_APIENTRY glStencilOp (GLenum fail, GLenum zfail, GLenum zpass);
+GL_API void GL_APIENTRY glTexCoordPointer (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer);
+GL_API void GL_APIENTRY glTexEnvi (GLenum target, GLenum pname, GLint param);
+GL_API void GL_APIENTRY glTexEnvx (GLenum target, GLenum pname, GLfixed param);
+GL_API void GL_APIENTRY glTexEnviv (GLenum target, GLenum pname, const GLint *params);
+GL_API void GL_APIENTRY glTexEnvxv (GLenum target, GLenum pname, const GLfixed *params);
+GL_API void GL_APIENTRY glTexImage2D (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid *pixels);
+GL_API void GL_APIENTRY glTexParameteri (GLenum target, GLenum pname, GLint param);
+GL_API void GL_APIENTRY glTexParameterx (GLenum target, GLenum pname, GLfixed param);
+GL_API void GL_APIENTRY glTexParameteriv (GLenum target, GLenum pname, const GLint *params);
+GL_API void GL_APIENTRY glTexParameterxv (GLenum target, GLenum pname, const GLfixed *params);
+GL_API void GL_APIENTRY glTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels);
+GL_API void GL_APIENTRY glTranslatex (GLfixed x, GLfixed y, GLfixed z);
+GL_API void GL_APIENTRY glVertexPointer (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer);
+GL_API void GL_APIENTRY glViewport (GLint x, GLint y, GLsizei width, GLsizei height);
+
+/*------------------------------------------------------------------------*
+ * Required OES extension functions
+ *------------------------------------------------------------------------*/
+
+/* GL_OES_read_format */
+#ifndef GL_OES_read_format
+#define GL_OES_read_format 1
+#endif
+
+/* GL_OES_compressed_paletted_texture */
+#ifndef GL_OES_compressed_paletted_texture
+#define GL_OES_compressed_paletted_texture 1
+#endif
+
+/* GL_OES_point_size_array */
+#ifndef GL_OES_point_size_array
+#define GL_OES_point_size_array 1
+GL_API void GL_APIENTRY glPointSizePointerOES (GLenum type, GLsizei stride, const GLvoid *pointer);
+#endif
+
+/* GL_OES_point_sprite */
+#ifndef GL_OES_point_sprite
+#define GL_OES_point_sprite 1
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __gl_h_ */
+
diff --git a/opengles-3.1/include/GLES/glext.h b/opengles-3.1/include/GLES/glext.h
new file mode 100644
index 0000000000..5b46ae6d05
--- /dev/null
+++ b/opengles-3.1/include/GLES/glext.h
@@ -0,0 +1,1278 @@
+#ifndef __glext_h_
+#define __glext_h_
+
+/* $Revision: 19260 $ on $Date:: 2012-09-20 11:30:36 -0700 #$ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * This document is licensed under the SGI Free Software B License Version
+ * 2.0. For details, see http://oss.sgi.com/projects/FreeB/ .
+ */
+
+#ifndef GL_APIENTRYP
+#   define GL_APIENTRYP GL_APIENTRY*
+#endif
+
+/*------------------------------------------------------------------------*
+ * OES extension tokens
+ *------------------------------------------------------------------------*/
+
+/* GL_OES_blend_equation_separate */
+#ifndef GL_OES_blend_equation_separate
+/* BLEND_EQUATION_RGB_OES same as BLEND_EQUATION_OES */
+#define GL_BLEND_EQUATION_RGB_OES                               0x8009
+#define GL_BLEND_EQUATION_ALPHA_OES                             0x883D
+#endif
+
+/* GL_OES_blend_func_separate */
+#ifndef GL_OES_blend_func_separate
+#define GL_BLEND_DST_RGB_OES                                    0x80C8
+#define GL_BLEND_SRC_RGB_OES                                    0x80C9
+#define GL_BLEND_DST_ALPHA_OES                                  0x80CA
+#define GL_BLEND_SRC_ALPHA_OES                                  0x80CB
+#endif
+
+/* GL_OES_blend_subtract */
+#ifndef GL_OES_blend_subtract
+#define GL_BLEND_EQUATION_OES                                   0x8009
+#define GL_FUNC_ADD_OES                                         0x8006
+#define GL_FUNC_SUBTRACT_OES                                    0x800A
+#define GL_FUNC_REVERSE_SUBTRACT_OES                            0x800B
+#endif
+
+/* GL_OES_compressed_ETC1_RGB8_texture */
+#ifndef GL_OES_compressed_ETC1_RGB8_texture
+#define GL_ETC1_RGB8_OES                                        0x8D64
+#endif
+
+/* GL_OES_depth24 */
+#ifndef GL_OES_depth24
+#define GL_DEPTH_COMPONENT24_OES                                0x81A6
+#endif
+
+/* GL_OES_depth32 */
+#ifndef GL_OES_depth32
+#define GL_DEPTH_COMPONENT32_OES                                0x81A7
+#endif
+
+/* GL_OES_draw_texture */
+#ifndef GL_OES_draw_texture
+#define GL_TEXTURE_CROP_RECT_OES                                0x8B9D
+#endif
+
+/* GL_OES_EGL_image */
+#ifndef GL_OES_EGL_image
+typedef void* GLeglImageOES;
+#endif
+
+/* GL_OES_EGL_image_external */
+#ifndef GL_OES_EGL_image_external
+/* GLeglImageOES defined in GL_OES_EGL_image already. */
+#define GL_TEXTURE_EXTERNAL_OES                                 0x8D65
+#define GL_TEXTURE_BINDING_EXTERNAL_OES                         0x8D67
+#define GL_REQUIRED_TEXTURE_IMAGE_UNITS_OES                     0x8D68
+#endif
+
+/* GL_OES_element_index_uint */
+#ifndef GL_OES_element_index_uint
+#define GL_UNSIGNED_INT                                         0x1405
+#endif
+
+/* GL_OES_fixed_point */
+#ifndef GL_OES_fixed_point
+#define GL_FIXED_OES                                            0x140C
+#endif
+
+/* GL_OES_framebuffer_object */
+#ifndef GL_OES_framebuffer_object
+#define GL_NONE_OES                                             0
+#define GL_FRAMEBUFFER_OES                                      0x8D40
+#define GL_RENDERBUFFER_OES                                     0x8D41
+#define GL_RGBA4_OES                                            0x8056
+#define GL_RGB5_A1_OES                                          0x8057
+#define GL_RGB565_OES                                           0x8D62
+#define GL_DEPTH_COMPONENT16_OES                                0x81A5
+#define GL_RENDERBUFFER_WIDTH_OES                               0x8D42
+#define GL_RENDERBUFFER_HEIGHT_OES                              0x8D43
+#define GL_RENDERBUFFER_INTERNAL_FORMAT_OES                     0x8D44
+#define GL_RENDERBUFFER_RED_SIZE_OES                            0x8D50
+#define GL_RENDERBUFFER_GREEN_SIZE_OES                          0x8D51
+#define GL_RENDERBUFFER_BLUE_SIZE_OES                           0x8D52
+#define GL_RENDERBUFFER_ALPHA_SIZE_OES                          0x8D53
+#define GL_RENDERBUFFER_DEPTH_SIZE_OES                          0x8D54
+#define GL_RENDERBUFFER_STENCIL_SIZE_OES                        0x8D55
+#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE_OES               0x8CD0
+#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME_OES               0x8CD1
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL_OES             0x8CD2
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE_OES     0x8CD3
+#define GL_COLOR_ATTACHMENT0_OES                                0x8CE0
+#define GL_DEPTH_ATTACHMENT_OES                                 0x8D00
+#define GL_STENCIL_ATTACHMENT_OES                               0x8D20
+#define GL_FRAMEBUFFER_COMPLETE_OES                             0x8CD5
+#define GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT_OES                0x8CD6
+#define GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_OES        0x8CD7
+#define GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS_OES                0x8CD9
+#define GL_FRAMEBUFFER_INCOMPLETE_FORMATS_OES                   0x8CDA
+#define GL_FRAMEBUFFER_UNSUPPORTED_OES                          0x8CDD
+#define GL_FRAMEBUFFER_BINDING_OES                              0x8CA6
+#define GL_RENDERBUFFER_BINDING_OES                             0x8CA7
+#define GL_MAX_RENDERBUFFER_SIZE_OES                            0x84E8
+#define GL_INVALID_FRAMEBUFFER_OPERATION_OES                    0x0506
+#endif
+
+/* GL_OES_mapbuffer */
+#ifndef GL_OES_mapbuffer
+#define GL_WRITE_ONLY_OES                                       0x88B9
+#define GL_BUFFER_ACCESS_OES                                    0x88BB
+#define GL_BUFFER_MAPPED_OES                                    0x88BC
+#define GL_BUFFER_MAP_POINTER_OES                               0x88BD
+#endif
+
+/* GL_OES_matrix_get */
+#ifndef GL_OES_matrix_get
+#define GL_MODELVIEW_MATRIX_FLOAT_AS_INT_BITS_OES               0x898D
+#define GL_PROJECTION_MATRIX_FLOAT_AS_INT_BITS_OES              0x898E
+#define GL_TEXTURE_MATRIX_FLOAT_AS_INT_BITS_OES                 0x898F
+#endif
+
+/* GL_OES_matrix_palette */
+#ifndef GL_OES_matrix_palette
+#define GL_MAX_VERTEX_UNITS_OES                                 0x86A4
+#define GL_MAX_PALETTE_MATRICES_OES                             0x8842
+#define GL_MATRIX_PALETTE_OES                                   0x8840
+#define GL_MATRIX_INDEX_ARRAY_OES                               0x8844
+#define GL_WEIGHT_ARRAY_OES                                     0x86AD
+#define GL_CURRENT_PALETTE_MATRIX_OES                           0x8843
+#define GL_MATRIX_INDEX_ARRAY_SIZE_OES                          0x8846
+#define GL_MATRIX_INDEX_ARRAY_TYPE_OES                          0x8847
+#define GL_MATRIX_INDEX_ARRAY_STRIDE_OES                        0x8848
+#define GL_MATRIX_INDEX_ARRAY_POINTER_OES                       0x8849
+#define GL_MATRIX_INDEX_ARRAY_BUFFER_BINDING_OES                0x8B9E
+#define GL_WEIGHT_ARRAY_SIZE_OES                                0x86AB
+#define GL_WEIGHT_ARRAY_TYPE_OES                                0x86A9
+#define GL_WEIGHT_ARRAY_STRIDE_OES                              0x86AA
+#define GL_WEIGHT_ARRAY_POINTER_OES                             0x86AC
+#define GL_WEIGHT_ARRAY_BUFFER_BINDING_OES                      0x889E
+#endif
+
+/* GL_OES_packed_depth_stencil */
+#ifndef GL_OES_packed_depth_stencil
+#define GL_DEPTH_STENCIL_OES                                    0x84F9
+#define GL_UNSIGNED_INT_24_8_OES                                0x84FA
+#define GL_DEPTH24_STENCIL8_OES                                 0x88F0
+#endif
+
+/* GL_OES_required_internalformat */
+/* No new tokens introduced by this extension. */
+
+/* GL_OES_rgb8_rgba8 */
+#ifndef GL_OES_rgb8_rgba8
+#define GL_RGB8_OES                                             0x8051
+#define GL_RGBA8_OES                                            0x8058
+#endif
+
+/* GL_OES_stencil1 */
+#ifndef GL_OES_stencil1
+#define GL_STENCIL_INDEX1_OES                                   0x8D46
+#endif
+
+/* GL_OES_stencil4 */
+#ifndef GL_OES_stencil4
+#define GL_STENCIL_INDEX4_OES                                   0x8D47
+#endif
+
+/* GL_OES_stencil8 */
+#ifndef GL_OES_stencil8
+#define GL_STENCIL_INDEX8_OES                                   0x8D48
+#endif
+
+/* GL_OES_stencil_wrap */
+#ifndef GL_OES_stencil_wrap
+#define GL_INCR_WRAP_OES                                        0x8507
+#define GL_DECR_WRAP_OES                                        0x8508
+#endif
+
+/* GL_OES_texture_cube_map */
+#ifndef GL_OES_texture_cube_map
+#define GL_NORMAL_MAP_OES                                       0x8511
+#define GL_REFLECTION_MAP_OES                                   0x8512
+#define GL_TEXTURE_CUBE_MAP_OES                                 0x8513
+#define GL_TEXTURE_BINDING_CUBE_MAP_OES                         0x8514
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_X_OES                      0x8515
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X_OES                      0x8516
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y_OES                      0x8517
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_OES                      0x8518
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z_OES                      0x8519
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_OES                      0x851A
+#define GL_MAX_CUBE_MAP_TEXTURE_SIZE_OES                        0x851C
+#define GL_TEXTURE_GEN_MODE_OES                                 0x2500
+#define GL_TEXTURE_GEN_STR_OES                                  0x8D60
+#endif
+
+/* GL_OES_texture_mirrored_repeat */
+#ifndef GL_OES_texture_mirrored_repeat
+#define GL_MIRRORED_REPEAT_OES                                  0x8370
+#endif
+
+/* GL_OES_vertex_array_object */
+#ifndef GL_OES_vertex_array_object
+#define GL_VERTEX_ARRAY_BINDING_OES                             0x85B5
+#endif
+
+/*------------------------------------------------------------------------*
+ * AMD extension tokens
+ *------------------------------------------------------------------------*/
+
+/* GL_AMD_compressed_3DC_texture */
+#ifndef GL_AMD_compressed_3DC_texture
+#define GL_3DC_X_AMD                                            0x87F9
+#define GL_3DC_XY_AMD                                           0x87FA
+#endif
+
+/* GL_AMD_compressed_ATC_texture */
+#ifndef GL_AMD_compressed_ATC_texture
+#define GL_ATC_RGB_AMD                                          0x8C92
+#define GL_ATC_RGBA_EXPLICIT_ALPHA_AMD                          0x8C93
+#define GL_ATC_RGBA_INTERPOLATED_ALPHA_AMD                      0x87EE
+#endif
+
+/*------------------------------------------------------------------------*
+ * APPLE extension tokens
+ *------------------------------------------------------------------------*/
+
+/* GL_APPLE_copy_texture_levels */
+/* No new tokens introduced by this extension. */
+
+/* GL_APPLE_framebuffer_multisample */
+#ifndef GL_APPLE_framebuffer_multisample
+#define GL_RENDERBUFFER_SAMPLES_APPLE                           0x8CAB
+#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE_APPLE             0x8D56
+#define GL_MAX_SAMPLES_APPLE                                    0x8D57
+#define GL_READ_FRAMEBUFFER_APPLE                               0x8CA8
+#define GL_DRAW_FRAMEBUFFER_APPLE                               0x8CA9
+#define GL_DRAW_FRAMEBUFFER_BINDING_APPLE                       0x8CA6
+#define GL_READ_FRAMEBUFFER_BINDING_APPLE                       0x8CAA
+#endif
+
+/* GL_APPLE_sync */
+#ifndef GL_APPLE_sync
+
+/* These types are defined with reference to <inttypes.h>
+ * in the Apple extension spec, but here we use the Khronos
+ * portable types in khrplatform.h, and assume those types
+ * are always defined.
+ * If any other extensions using these types are defined,
+ * the typedefs must move out of this block and be shared.
+ */
+typedef khronos_int64_t GLint64;
+typedef khronos_uint64_t GLuint64;
+typedef struct __GLsync *GLsync;
+
+#define GL_SYNC_OBJECT_APPLE                                    0x8A53
+#define GL_MAX_SERVER_WAIT_TIMEOUT_APPLE                        0x9111
+#define GL_OBJECT_TYPE_APPLE                                    0x9112
+#define GL_SYNC_CONDITION_APPLE                                 0x9113
+#define GL_SYNC_STATUS_APPLE                                    0x9114
+#define GL_SYNC_FLAGS_APPLE                                     0x9115
+#define GL_SYNC_FENCE_APPLE                                     0x9116
+#define GL_SYNC_GPU_COMMANDS_COMPLETE_APPLE                     0x9117
+#define GL_UNSIGNALED_APPLE                                     0x9118
+#define GL_SIGNALED_APPLE                                       0x9119
+#define GL_ALREADY_SIGNALED_APPLE                               0x911A
+#define GL_TIMEOUT_EXPIRED_APPLE                                0x911B
+#define GL_CONDITION_SATISFIED_APPLE                            0x911C
+#define GL_WAIT_FAILED_APPLE                                    0x911D
+#define GL_SYNC_FLUSH_COMMANDS_BIT_APPLE                        0x00000001
+#define GL_TIMEOUT_IGNORED_APPLE                                0xFFFFFFFFFFFFFFFFull
+#endif
+
+/* GL_APPLE_texture_2D_limited_npot */
+/* No new tokens introduced by this extension. */
+
+/* GL_APPLE_texture_format_BGRA8888 */
+#ifndef GL_APPLE_texture_format_BGRA8888
+#define GL_BGRA_EXT                                             0x80E1
+#endif
+
+/* GL_APPLE_texture_max_level */
+#ifndef GL_APPLE_texture_max_level
+#define GL_TEXTURE_MAX_LEVEL_APPLE                              0x813D
+#endif
+
+/*------------------------------------------------------------------------*
+ * ARM extension tokens
+ *------------------------------------------------------------------------*/
+
+/* GL_ARM_rgba8 */
+/* No new tokens introduced by this extension. */
+
+/*------------------------------------------------------------------------*
+ * EXT extension tokens
+ *------------------------------------------------------------------------*/
+
+/* GL_EXT_blend_minmax */
+#ifndef GL_EXT_blend_minmax
+#define GL_MIN_EXT                                              0x8007
+#define GL_MAX_EXT                                              0x8008
+#endif
+
+/* GL_EXT_discard_framebuffer */
+#ifndef GL_EXT_discard_framebuffer
+#define GL_COLOR_EXT                                            0x1800
+#define GL_DEPTH_EXT                                            0x1801
+#define GL_STENCIL_EXT                                          0x1802
+#endif
+
+/* GL_EXT_map_buffer_range */
+#ifndef GL_EXT_map_buffer_range
+#define GL_MAP_READ_BIT_EXT                                     0x0001
+#define GL_MAP_WRITE_BIT_EXT                                    0x0002
+#define GL_MAP_INVALIDATE_RANGE_BIT_EXT                         0x0004
+#define GL_MAP_INVALIDATE_BUFFER_BIT_EXT                        0x0008
+#define GL_MAP_FLUSH_EXPLICIT_BIT_EXT                           0x0010
+#define GL_MAP_UNSYNCHRONIZED_BIT_EXT                           0x0020
+#endif
+
+/* GL_EXT_multisampled_render_to_texture */
+#ifndef GL_EXT_multisampled_render_to_texture
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_SAMPLES_EXT           0x8D6C
+/* reuse values from GL_EXT_framebuffer_multisample (desktop extension) */
+#define GL_RENDERBUFFER_SAMPLES_EXT                             0x8CAB
+#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE_EXT               0x8D56
+#define GL_MAX_SAMPLES_EXT                                      0x8D57
+#endif
+
+/* GL_EXT_multi_draw_arrays */
+/* No new tokens introduced by this extension. */
+
+/* GL_EXT_read_format_bgra */
+#ifndef GL_EXT_read_format_bgra
+#define GL_BGRA_EXT                                             0x80E1
+#define GL_UNSIGNED_SHORT_4_4_4_4_REV_EXT                       0x8365
+#define GL_UNSIGNED_SHORT_1_5_5_5_REV_EXT                       0x8366
+#endif
+
+/* GL_EXT_robustness */
+#ifndef GL_EXT_robustness
+/* reuse GL_NO_ERROR */
+#define GL_GUILTY_CONTEXT_RESET_EXT                             0x8253
+#define GL_INNOCENT_CONTEXT_RESET_EXT                           0x8254
+#define GL_UNKNOWN_CONTEXT_RESET_EXT                            0x8255
+#define GL_CONTEXT_ROBUST_ACCESS_EXT                            0x90F3
+#define GL_RESET_NOTIFICATION_STRATEGY_EXT                      0x8256
+#define GL_LOSE_CONTEXT_ON_RESET_EXT                            0x8252
+#define GL_NO_RESET_NOTIFICATION_EXT                            0x8261
+#endif
+
+/* GL_EXT_sRGB */
+#ifndef GL_EXT_sRGB
+#define GL_SRGB_EXT                                             0x8C40
+#define GL_SRGB_ALPHA_EXT                                       0x8C42
+#define GL_SRGB8_ALPHA8_EXT                                     0x8C43
+#define GL_FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING_EXT            0x8210
+#endif
+
+/* GL_EXT_texture_compression_dxt1 */
+#ifndef GL_EXT_texture_compression_dxt1
+#define GL_COMPRESSED_RGB_S3TC_DXT1_EXT                         0x83F0
+#define GL_COMPRESSED_RGBA_S3TC_DXT1_EXT                        0x83F1
+#endif
+
+/* GL_EXT_texture_filter_anisotropic */
+#ifndef GL_EXT_texture_filter_anisotropic
+#define GL_TEXTURE_MAX_ANISOTROPY_EXT                           0x84FE
+#define GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT                       0x84FF
+#endif
+
+/* GL_EXT_texture_format_BGRA8888 */
+#ifndef GL_EXT_texture_format_BGRA8888
+#define GL_BGRA_EXT                                             0x80E1
+#endif
+
+/* GL_EXT_texture_lod_bias */
+#ifndef GL_EXT_texture_lod_bias
+#define GL_MAX_TEXTURE_LOD_BIAS_EXT                             0x84FD
+#define GL_TEXTURE_FILTER_CONTROL_EXT                           0x8500
+#define GL_TEXTURE_LOD_BIAS_EXT                                 0x8501
+#endif
+
+/* GL_EXT_texture_storage */
+#ifndef GL_EXT_texture_storage
+#define GL_TEXTURE_IMMUTABLE_FORMAT_EXT                         0x912F
+#define GL_ALPHA8_EXT                                           0x803C
+#define GL_LUMINANCE8_EXT                                       0x8040
+#define GL_LUMINANCE8_ALPHA8_EXT                                0x8045
+#define GL_RGBA32F_EXT                                          0x8814
+#define GL_RGB32F_EXT                                           0x8815
+#define GL_ALPHA32F_EXT                                         0x8816
+#define GL_LUMINANCE32F_EXT                                     0x8818
+#define GL_LUMINANCE_ALPHA32F_EXT                               0x8819
+/* reuse GL_RGBA16F_EXT */
+#define GL_RGB16F_EXT                                           0x881B
+#define GL_ALPHA16F_EXT                                         0x881C
+#define GL_LUMINANCE16F_EXT                                     0x881E
+#define GL_LUMINANCE_ALPHA16F_EXT                               0x881F
+#define GL_RGB10_A2_EXT                                         0x8059
+#define GL_RGB10_EXT                                            0x8052
+#define GL_BGRA8_EXT                                            0x93A1
+#endif
+
+/*------------------------------------------------------------------------*
+ * IMG extension tokens
+ *------------------------------------------------------------------------*/
+
+/* GL_IMG_read_format */
+#ifndef GL_IMG_read_format
+#define GL_BGRA_IMG                                             0x80E1
+#define GL_UNSIGNED_SHORT_4_4_4_4_REV_IMG                       0x8365
+#endif
+
+/* GL_IMG_texture_compression_pvrtc */
+#ifndef GL_IMG_texture_compression_pvrtc
+#define GL_COMPRESSED_RGB_PVRTC_4BPPV1_IMG                      0x8C00
+#define GL_COMPRESSED_RGB_PVRTC_2BPPV1_IMG                      0x8C01
+#define GL_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG                     0x8C02
+#define GL_COMPRESSED_RGBA_PVRTC_2BPPV1_IMG                     0x8C03
+#endif
+
+/* GL_IMG_texture_env_enhanced_fixed_function */
+#ifndef GL_IMG_texture_env_enhanced_fixed_function
+#define GL_MODULATE_COLOR_IMG                                   0x8C04
+#define GL_RECIP_ADD_SIGNED_ALPHA_IMG                           0x8C05
+#define GL_TEXTURE_ALPHA_MODULATE_IMG                           0x8C06
+#define GL_FACTOR_ALPHA_MODULATE_IMG                            0x8C07
+#define GL_FRAGMENT_ALPHA_MODULATE_IMG                          0x8C08
+#define GL_ADD_BLEND_IMG                                        0x8C09
+#define GL_DOT3_RGBA_IMG                                        0x86AF
+#endif
+
+/* GL_IMG_user_clip_plane */
+#ifndef GL_IMG_user_clip_plane
+#define GL_CLIP_PLANE0_IMG                                      0x3000
+#define GL_CLIP_PLANE1_IMG                                      0x3001
+#define GL_CLIP_PLANE2_IMG                                      0x3002
+#define GL_CLIP_PLANE3_IMG                                      0x3003
+#define GL_CLIP_PLANE4_IMG                                      0x3004
+#define GL_CLIP_PLANE5_IMG                                      0x3005
+#define GL_MAX_CLIP_PLANES_IMG                                  0x0D32
+#endif
+
+/* GL_IMG_multisampled_render_to_texture */
+#ifndef GL_IMG_multisampled_render_to_texture
+#define GL_RENDERBUFFER_SAMPLES_IMG                             0x9133
+#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE_IMG               0x9134
+#define GL_MAX_SAMPLES_IMG                                      0x9135
+#define GL_TEXTURE_SAMPLES_IMG                                  0x9136
+#endif
+
+/*------------------------------------------------------------------------*
+ * NV extension tokens
+ *------------------------------------------------------------------------*/
+
+/* GL_NV_fence */
+#ifndef GL_NV_fence
+#define GL_ALL_COMPLETED_NV                                     0x84F2
+#define GL_FENCE_STATUS_NV                                      0x84F3
+#define GL_FENCE_CONDITION_NV                                   0x84F4
+#endif
+
+/*------------------------------------------------------------------------*
+ * QCOM extension tokens
+ *------------------------------------------------------------------------*/
+
+/* GL_QCOM_driver_control */
+/* No new tokens introduced by this extension. */
+
+/* GL_QCOM_extended_get */
+#ifndef GL_QCOM_extended_get
+#define GL_TEXTURE_WIDTH_QCOM                                   0x8BD2
+#define GL_TEXTURE_HEIGHT_QCOM                                  0x8BD3
+#define GL_TEXTURE_DEPTH_QCOM                                   0x8BD4
+#define GL_TEXTURE_INTERNAL_FORMAT_QCOM                         0x8BD5
+#define GL_TEXTURE_FORMAT_QCOM                                  0x8BD6
+#define GL_TEXTURE_TYPE_QCOM                                    0x8BD7
+#define GL_TEXTURE_IMAGE_VALID_QCOM                             0x8BD8
+#define GL_TEXTURE_NUM_LEVELS_QCOM                              0x8BD9
+#define GL_TEXTURE_TARGET_QCOM                                  0x8BDA
+#define GL_TEXTURE_OBJECT_VALID_QCOM                            0x8BDB
+#define GL_STATE_RESTORE                                        0x8BDC
+#endif
+
+/* GL_QCOM_extended_get2 */
+/* No new tokens introduced by this extension. */
+
+/* GL_QCOM_perfmon_global_mode */
+#ifndef GL_QCOM_perfmon_global_mode
+#define GL_PERFMON_GLOBAL_MODE_QCOM                             0x8FA0
+#endif
+
+/* GL_QCOM_writeonly_rendering */
+#ifndef GL_QCOM_writeonly_rendering
+#define GL_WRITEONLY_RENDERING_QCOM                             0x8823
+#endif
+
+/* GL_QCOM_tiled_rendering */
+#ifndef GL_QCOM_tiled_rendering
+#define GL_COLOR_BUFFER_BIT0_QCOM                               0x00000001
+#define GL_COLOR_BUFFER_BIT1_QCOM                               0x00000002
+#define GL_COLOR_BUFFER_BIT2_QCOM                               0x00000004
+#define GL_COLOR_BUFFER_BIT3_QCOM                               0x00000008
+#define GL_COLOR_BUFFER_BIT4_QCOM                               0x00000010
+#define GL_COLOR_BUFFER_BIT5_QCOM                               0x00000020
+#define GL_COLOR_BUFFER_BIT6_QCOM                               0x00000040
+#define GL_COLOR_BUFFER_BIT7_QCOM                               0x00000080
+#define GL_DEPTH_BUFFER_BIT0_QCOM                               0x00000100
+#define GL_DEPTH_BUFFER_BIT1_QCOM                               0x00000200
+#define GL_DEPTH_BUFFER_BIT2_QCOM                               0x00000400
+#define GL_DEPTH_BUFFER_BIT3_QCOM                               0x00000800
+#define GL_DEPTH_BUFFER_BIT4_QCOM                               0x00001000
+#define GL_DEPTH_BUFFER_BIT5_QCOM                               0x00002000
+#define GL_DEPTH_BUFFER_BIT6_QCOM                               0x00004000
+#define GL_DEPTH_BUFFER_BIT7_QCOM                               0x00008000
+#define GL_STENCIL_BUFFER_BIT0_QCOM                             0x00010000
+#define GL_STENCIL_BUFFER_BIT1_QCOM                             0x00020000
+#define GL_STENCIL_BUFFER_BIT2_QCOM                             0x00040000
+#define GL_STENCIL_BUFFER_BIT3_QCOM                             0x00080000
+#define GL_STENCIL_BUFFER_BIT4_QCOM                             0x00100000
+#define GL_STENCIL_BUFFER_BIT5_QCOM                             0x00200000
+#define GL_STENCIL_BUFFER_BIT6_QCOM                             0x00400000
+#define GL_STENCIL_BUFFER_BIT7_QCOM                             0x00800000
+#define GL_MULTISAMPLE_BUFFER_BIT0_QCOM                         0x01000000
+#define GL_MULTISAMPLE_BUFFER_BIT1_QCOM                         0x02000000
+#define GL_MULTISAMPLE_BUFFER_BIT2_QCOM                         0x04000000
+#define GL_MULTISAMPLE_BUFFER_BIT3_QCOM                         0x08000000
+#define GL_MULTISAMPLE_BUFFER_BIT4_QCOM                         0x10000000
+#define GL_MULTISAMPLE_BUFFER_BIT5_QCOM                         0x20000000
+#define GL_MULTISAMPLE_BUFFER_BIT6_QCOM                         0x40000000
+#define GL_MULTISAMPLE_BUFFER_BIT7_QCOM                         0x80000000
+#endif
+
+/*------------------------------------------------------------------------*
+ * End of extension tokens, start of corresponding extension functions
+ *------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------*
+ * OES extension functions
+ *------------------------------------------------------------------------*/
+
+/* GL_OES_blend_equation_separate */
+#ifndef GL_OES_blend_equation_separate
+#define GL_OES_blend_equation_separate 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY glBlendEquationSeparateOES (GLenum modeRGB, GLenum modeAlpha);
+#endif
+typedef void (GL_APIENTRYP PFNGLBLENDEQUATIONSEPARATEOESPROC) (GLenum modeRGB, GLenum modeAlpha);
+#endif
+
+/* GL_OES_blend_func_separate */
+#ifndef GL_OES_blend_func_separate
+#define GL_OES_blend_func_separate 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY glBlendFuncSeparateOES (GLenum srcRGB, GLenum dstRGB, GLenum srcAlpha, GLenum dstAlpha);
+#endif
+typedef void (GL_APIENTRYP PFNGLBLENDFUNCSEPARATEOESPROC) (GLenum srcRGB, GLenum dstRGB, GLenum srcAlpha, GLenum dstAlpha);
+#endif
+
+/* GL_OES_blend_subtract */
+#ifndef GL_OES_blend_subtract
+#define GL_OES_blend_subtract 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY glBlendEquationOES (GLenum mode);
+#endif
+typedef void (GL_APIENTRYP PFNGLBLENDEQUATIONOESPROC) (GLenum mode);
+#endif
+
+/* GL_OES_byte_coordinates */
+#ifndef GL_OES_byte_coordinates
+#define GL_OES_byte_coordinates 1
+#endif
+
+/* GL_OES_compressed_ETC1_RGB8_texture */
+#ifndef GL_OES_compressed_ETC1_RGB8_texture
+#define GL_OES_compressed_ETC1_RGB8_texture 1
+#endif
+
+/* GL_OES_depth24 */
+#ifndef GL_OES_depth24
+#define GL_OES_depth24 1
+#endif
+
+/* GL_OES_depth32 */
+#ifndef GL_OES_depth32
+#define GL_OES_depth32 1
+#endif
+
+/* GL_OES_draw_texture */
+#ifndef GL_OES_draw_texture
+#define GL_OES_draw_texture 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY glDrawTexsOES (GLshort x, GLshort y, GLshort z, GLshort width, GLshort height);
+GL_API void GL_APIENTRY glDrawTexiOES (GLint x, GLint y, GLint z, GLint width, GLint height);
+GL_API void GL_APIENTRY glDrawTexxOES (GLfixed x, GLfixed y, GLfixed z, GLfixed width, GLfixed height);
+GL_API void GL_APIENTRY glDrawTexsvOES (const GLshort *coords);
+GL_API void GL_APIENTRY glDrawTexivOES (const GLint *coords);
+GL_API void GL_APIENTRY glDrawTexxvOES (const GLfixed *coords);
+GL_API void GL_APIENTRY glDrawTexfOES (GLfloat x, GLfloat y, GLfloat z, GLfloat width, GLfloat height);
+GL_API void GL_APIENTRY glDrawTexfvOES (const GLfloat *coords);
+#endif
+typedef void (GL_APIENTRYP PFNGLDRAWTEXSOESPROC) (GLshort x, GLshort y, GLshort z, GLshort width, GLshort height);
+typedef void (GL_APIENTRYP PFNGLDRAWTEXIOESPROC) (GLint x, GLint y, GLint z, GLint width, GLint height);
+typedef void (GL_APIENTRYP PFNGLDRAWTEXXOESPROC) (GLfixed x, GLfixed y, GLfixed z, GLfixed width, GLfixed height);
+typedef void (GL_APIENTRYP PFNGLDRAWTEXSVOESPROC) (const GLshort *coords);
+typedef void (GL_APIENTRYP PFNGLDRAWTEXIVOESPROC) (const GLint *coords);
+typedef void (GL_APIENTRYP PFNGLDRAWTEXXVOESPROC) (const GLfixed *coords);
+typedef void (GL_APIENTRYP PFNGLDRAWTEXFOESPROC) (GLfloat x, GLfloat y, GLfloat z, GLfloat width, GLfloat height);
+typedef void (GL_APIENTRYP PFNGLDRAWTEXFVOESPROC) (const GLfloat *coords);
+#endif
+
+/* GL_OES_EGL_image */
+#ifndef GL_OES_EGL_image
+#define GL_OES_EGL_image 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY glEGLImageTargetTexture2DOES (GLenum target, GLeglImageOES image);
+GL_API void GL_APIENTRY glEGLImageTargetRenderbufferStorageOES (GLenum target, GLeglImageOES image);
+#endif
+typedef void (GL_APIENTRYP PFNGLEGLIMAGETARGETTEXTURE2DOESPROC) (GLenum target, GLeglImageOES image);
+typedef void (GL_APIENTRYP PFNGLEGLIMAGETARGETRENDERBUFFERSTORAGEOESPROC) (GLenum target, GLeglImageOES image);
+#endif
+
+/* GL_OES_EGL_image_external */
+#ifndef GL_OES_EGL_image_external
+#define GL_OES_EGL_image_external 1
+/* glEGLImageTargetTexture2DOES defined in GL_OES_EGL_image already. */
+#endif
+
+/* GL_OES_element_index_uint */
+#ifndef GL_OES_element_index_uint
+#define GL_OES_element_index_uint 1
+#endif
+
+/* GL_OES_extended_matrix_palette */
+#ifndef GL_OES_extended_matrix_palette
+#define GL_OES_extended_matrix_palette 1
+#endif
+
+/* GL_OES_fbo_render_mipmap */
+#ifndef GL_OES_fbo_render_mipmap
+#define GL_OES_fbo_render_mipmap 1
+#endif
+
+/* GL_OES_fixed_point */
+#ifndef GL_OES_fixed_point
+#define GL_OES_fixed_point 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY glAlphaFuncxOES (GLenum func, GLclampx ref);
+GL_API void GL_APIENTRY glClearColorxOES (GLclampx red, GLclampx green, GLclampx blue, GLclampx alpha);
+GL_API void GL_APIENTRY glClearDepthxOES (GLclampx depth);
+GL_API void GL_APIENTRY glClipPlanexOES (GLenum plane, const GLfixed *equation);
+GL_API void GL_APIENTRY glColor4xOES (GLfixed red, GLfixed green, GLfixed blue, GLfixed alpha);
+GL_API void GL_APIENTRY glDepthRangexOES (GLclampx zNear, GLclampx zFar);
+GL_API void GL_APIENTRY glFogxOES (GLenum pname, GLfixed param);
+GL_API void GL_APIENTRY glFogxvOES (GLenum pname, const GLfixed *params);
+GL_API void GL_APIENTRY glFrustumxOES (GLfixed left, GLfixed right, GLfixed bottom, GLfixed top, GLfixed zNear, GLfixed zFar);
+GL_API void GL_APIENTRY glGetClipPlanexOES (GLenum pname, GLfixed eqn[4]);
+GL_API void GL_APIENTRY glGetFixedvOES (GLenum pname, GLfixed *params);
+GL_API void GL_APIENTRY glGetLightxvOES (GLenum light, GLenum pname, GLfixed *params);
+GL_API void GL_APIENTRY glGetMaterialxvOES (GLenum face, GLenum pname, GLfixed *params);
+GL_API void GL_APIENTRY glGetTexEnvxvOES (GLenum env, GLenum pname, GLfixed *params);
+GL_API void GL_APIENTRY glGetTexParameterxvOES (GLenum target, GLenum pname, GLfixed *params);
+GL_API void GL_APIENTRY glLightModelxOES (GLenum pname, GLfixed param);
+GL_API void GL_APIENTRY glLightModelxvOES (GLenum pname, const GLfixed *params);
+GL_API void GL_APIENTRY glLightxOES (GLenum light, GLenum pname, GLfixed param);
+GL_API void GL_APIENTRY glLightxvOES (GLenum light, GLenum pname, const GLfixed *params);
+GL_API void GL_APIENTRY glLineWidthxOES (GLfixed width);
+GL_API void GL_APIENTRY glLoadMatrixxOES (const GLfixed *m);
+GL_API void GL_APIENTRY glMaterialxOES (GLenum face, GLenum pname, GLfixed param);
+GL_API void GL_APIENTRY glMaterialxvOES (GLenum face, GLenum pname, const GLfixed *params);
+GL_API void GL_APIENTRY glMultMatrixxOES (const GLfixed *m);
+GL_API void GL_APIENTRY glMultiTexCoord4xOES (GLenum target, GLfixed s, GLfixed t, GLfixed r, GLfixed q);
+GL_API void GL_APIENTRY glNormal3xOES (GLfixed nx, GLfixed ny, GLfixed nz);
+GL_API void GL_APIENTRY glOrthoxOES (GLfixed left, GLfixed right, GLfixed bottom, GLfixed top, GLfixed zNear, GLfixed zFar);
+GL_API void GL_APIENTRY glPointParameterxOES (GLenum pname, GLfixed param);
+GL_API void GL_APIENTRY glPointParameterxvOES (GLenum pname, const GLfixed *params);
+GL_API void GL_APIENTRY glPointSizexOES (GLfixed size);
+GL_API void GL_APIENTRY glPolygonOffsetxOES (GLfixed factor, GLfixed units);
+GL_API void GL_APIENTRY glRotatexOES (GLfixed angle, GLfixed x, GLfixed y, GLfixed z);
+GL_API void GL_APIENTRY glSampleCoveragexOES (GLclampx value, GLboolean invert);
+GL_API void GL_APIENTRY glScalexOES (GLfixed x, GLfixed y, GLfixed z);
+GL_API void GL_APIENTRY glTexEnvxOES (GLenum target, GLenum pname, GLfixed param);
+GL_API void GL_APIENTRY glTexEnvxvOES (GLenum target, GLenum pname, const GLfixed *params);
+GL_API void GL_APIENTRY glTexParameterxOES (GLenum target, GLenum pname, GLfixed param);
+GL_API void GL_APIENTRY glTexParameterxvOES (GLenum target, GLenum pname, const GLfixed *params);
+GL_API void GL_APIENTRY glTranslatexOES (GLfixed x, GLfixed y, GLfixed z);
+#endif
+typedef void (GL_APIENTRYP PFNGLALPHAFUNCXOESPROC) (GLenum func, GLclampx ref);
+typedef void (GL_APIENTRYP PFNGLCLEARCOLORXOESPROC) (GLclampx red, GLclampx green, GLclampx blue, GLclampx alpha);
+typedef void (GL_APIENTRYP PFNGLCLEARDEPTHXOESPROC) (GLclampx depth);
+typedef void (GL_APIENTRYP PFNGLCLIPPLANEXOESPROC) (GLenum plane, const GLfixed *equation);
+typedef void (GL_APIENTRYP PFNGLCOLOR4XOESPROC) (GLfixed red, GLfixed green, GLfixed blue, GLfixed alpha);
+typedef void (GL_APIENTRYP PFNGLDEPTHRANGEXOESPROC) (GLclampx zNear, GLclampx zFar);
+typedef void (GL_APIENTRYP PFNGLFOGXOESPROC) (GLenum pname, GLfixed param);
+typedef void (GL_APIENTRYP PFNGLFOGXVOESPROC) (GLenum pname, const GLfixed *params);
+typedef void (GL_APIENTRYP PFNGLFRUSTUMXOESPROC) (GLfixed left, GLfixed right, GLfixed bottom, GLfixed top, GLfixed zNear, GLfixed zFar);
+typedef void (GL_APIENTRYP PFNGLGETCLIPPLANEXOESPROC) (GLenum pname, GLfixed eqn[4]);
+typedef void (GL_APIENTRYP PFNGLGETFIXEDVOESPROC) (GLenum pname, GLfixed *params);
+typedef void (GL_APIENTRYP PFNGLGETLIGHTXVOESPROC) (GLenum light, GLenum pname, GLfixed *params);
+typedef void (GL_APIENTRYP PFNGLGETMATERIALXVOESPROC) (GLenum face, GLenum pname, GLfixed *params);
+typedef void (GL_APIENTRYP PFNGLGETTEXENVXVOESPROC) (GLenum env, GLenum pname, GLfixed *params);
+typedef void (GL_APIENTRYP PFNGLGETTEXPARAMETERXVOESPROC) (GLenum target, GLenum pname, GLfixed *params);
+typedef void (GL_APIENTRYP PFNGLLIGHTMODELXOESPROC) (GLenum pname, GLfixed param);
+typedef void (GL_APIENTRYP PFNGLLIGHTMODELXVOESPROC) (GLenum pname, const GLfixed *params);
+typedef void (GL_APIENTRYP PFNGLLIGHTXOESPROC) (GLenum light, GLenum pname, GLfixed param);
+typedef void (GL_APIENTRYP PFNGLLIGHTXVOESPROC) (GLenum light, GLenum pname, const GLfixed *params);
+typedef void (GL_APIENTRYP PFNGLLINEWIDTHXOESPROC) (GLfixed width);
+typedef void (GL_APIENTRYP PFNGLLOADMATRIXXOESPROC) (const GLfixed *m);
+typedef void (GL_APIENTRYP PFNGLMATERIALXOESPROC) (GLenum face, GLenum pname, GLfixed param);
+typedef void (GL_APIENTRYP PFNGLMATERIALXVOESPROC) (GLenum face, GLenum pname, const GLfixed *params);
+typedef void (GL_APIENTRYP PFNGLMULTMATRIXXOESPROC) (const GLfixed *m);
+typedef void (GL_APIENTRYP PFNGLMULTITEXCOORD4XOESPROC) (GLenum target, GLfixed s, GLfixed t, GLfixed r, GLfixed q);
+typedef void (GL_APIENTRYP PFNGLNORMAL3XOESPROC) (GLfixed nx, GLfixed ny, GLfixed nz);
+typedef void (GL_APIENTRYP PFNGLORTHOXOESPROC) (GLfixed left, GLfixed right, GLfixed bottom, GLfixed top, GLfixed zNear, GLfixed zFar);
+typedef void (GL_APIENTRYP PFNGLPOINTPARAMETERXOESPROC) (GLenum pname, GLfixed param);
+typedef void (GL_APIENTRYP PFNGLPOINTPARAMETERXVOESPROC) (GLenum pname, const GLfixed *params);
+typedef void (GL_APIENTRYP PFNGLPOINTSIZEXOESPROC) (GLfixed size);
+typedef void (GL_APIENTRYP PFNGLPOLYGONOFFSETXOESPROC) (GLfixed factor, GLfixed units);
+typedef void (GL_APIENTRYP PFNGLROTATEXOESPROC) (GLfixed angle, GLfixed x, GLfixed y, GLfixed z);
+typedef void (GL_APIENTRYP PFNGLSAMPLECOVERAGEXOESPROC) (GLclampx value, GLboolean invert);
+typedef void (GL_APIENTRYP PFNGLSCALEXOESPROC) (GLfixed x, GLfixed y, GLfixed z);
+typedef void (GL_APIENTRYP PFNGLTEXENVXOESPROC) (GLenum target, GLenum pname, GLfixed param);
+typedef void (GL_APIENTRYP PFNGLTEXENVXVOESPROC) (GLenum target, GLenum pname, const GLfixed *params);
+typedef void (GL_APIENTRYP PFNGLTEXPARAMETERXOESPROC) (GLenum target, GLenum pname, GLfixed param);
+typedef void (GL_APIENTRYP PFNGLTEXPARAMETERXVOESPROC) (GLenum target, GLenum pname, const GLfixed *params);
+typedef void (GL_APIENTRYP PFNGLTRANSLATEXOESPROC) (GLfixed x, GLfixed y, GLfixed z);
+#endif
+
+/* GL_OES_framebuffer_object */
+#ifndef GL_OES_framebuffer_object
+#define GL_OES_framebuffer_object 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API GLboolean GL_APIENTRY glIsRenderbufferOES (GLuint renderbuffer);
+GL_API void GL_APIENTRY glBindRenderbufferOES (GLenum target, GLuint renderbuffer);
+GL_API void GL_APIENTRY glDeleteRenderbuffersOES (GLsizei n, const GLuint* renderbuffers);
+GL_API void GL_APIENTRY glGenRenderbuffersOES (GLsizei n, GLuint* renderbuffers);
+GL_API void GL_APIENTRY glRenderbufferStorageOES (GLenum target, GLenum internalformat, GLsizei width, GLsizei height);
+GL_API void GL_APIENTRY glGetRenderbufferParameterivOES (GLenum target, GLenum pname, GLint* params);
+GL_API GLboolean GL_APIENTRY glIsFramebufferOES (GLuint framebuffer);
+GL_API void GL_APIENTRY glBindFramebufferOES (GLenum target, GLuint framebuffer);
+GL_API void GL_APIENTRY glDeleteFramebuffersOES (GLsizei n, const GLuint* framebuffers);
+GL_API void GL_APIENTRY glGenFramebuffersOES (GLsizei n, GLuint* framebuffers);
+GL_API GLenum GL_APIENTRY glCheckFramebufferStatusOES (GLenum target);
+GL_API void GL_APIENTRY glFramebufferRenderbufferOES (GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer);
+GL_API void GL_APIENTRY glFramebufferTexture2DOES (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
+GL_API void GL_APIENTRY glGetFramebufferAttachmentParameterivOES (GLenum target, GLenum attachment, GLenum pname, GLint* params);
+GL_API void GL_APIENTRY glGenerateMipmapOES (GLenum target);
+#endif
+typedef GLboolean (GL_APIENTRYP PFNGLISRENDERBUFFEROESPROC) (GLuint renderbuffer);
+typedef void (GL_APIENTRYP PFNGLBINDRENDERBUFFEROESPROC) (GLenum target, GLuint renderbuffer);
+typedef void (GL_APIENTRYP PFNGLDELETERENDERBUFFERSOESPROC) (GLsizei n, const GLuint* renderbuffers);
+typedef void (GL_APIENTRYP PFNGLGENRENDERBUFFERSOESPROC) (GLsizei n, GLuint* renderbuffers);
+typedef void (GL_APIENTRYP PFNGLRENDERBUFFERSTORAGEOESPROC) (GLenum target, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (GL_APIENTRYP PFNGLGETRENDERBUFFERPARAMETERIVOESPROC) (GLenum target, GLenum pname, GLint* params);
+typedef GLboolean (GL_APIENTRYP PFNGLISFRAMEBUFFEROESPROC) (GLuint framebuffer);
+typedef void (GL_APIENTRYP PFNGLBINDFRAMEBUFFEROESPROC) (GLenum target, GLuint framebuffer);
+typedef void (GL_APIENTRYP PFNGLDELETEFRAMEBUFFERSOESPROC) (GLsizei n, const GLuint* framebuffers);
+typedef void (GL_APIENTRYP PFNGLGENFRAMEBUFFERSOESPROC) (GLsizei n, GLuint* framebuffers);
+typedef GLenum (GL_APIENTRYP PFNGLCHECKFRAMEBUFFERSTATUSOESPROC) (GLenum target);
+typedef void (GL_APIENTRYP PFNGLFRAMEBUFFERRENDERBUFFEROESPROC) (GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer);
+typedef void (GL_APIENTRYP PFNGLFRAMEBUFFERTEXTURE2DOESPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
+typedef void (GL_APIENTRYP PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVOESPROC) (GLenum target, GLenum attachment, GLenum pname, GLint* params);
+typedef void (GL_APIENTRYP PFNGLGENERATEMIPMAPOESPROC) (GLenum target);
+#endif
+
+/* GL_OES_mapbuffer */
+#ifndef GL_OES_mapbuffer
+#define GL_OES_mapbuffer 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void* GL_APIENTRY glMapBufferOES (GLenum target, GLenum access);
+GL_API GLboolean GL_APIENTRY glUnmapBufferOES (GLenum target);
+GL_API void GL_APIENTRY glGetBufferPointervOES (GLenum target, GLenum pname, GLvoid ** params);
+#endif
+typedef void* (GL_APIENTRYP PFNGLMAPBUFFEROESPROC) (GLenum target, GLenum access);
+typedef GLboolean (GL_APIENTRYP PFNGLUNMAPBUFFEROESPROC) (GLenum target);
+typedef void (GL_APIENTRYP PFNGLGETBUFFERPOINTERVOESPROC) (GLenum target, GLenum pname, GLvoid ** params);
+#endif
+
+/* GL_OES_matrix_get */
+#ifndef GL_OES_matrix_get
+#define GL_OES_matrix_get 1
+#endif
+
+/* GL_OES_matrix_palette */
+#ifndef GL_OES_matrix_palette
+#define GL_OES_matrix_palette 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY glCurrentPaletteMatrixOES (GLuint matrixpaletteindex);
+GL_API void GL_APIENTRY glLoadPaletteFromModelViewMatrixOES (void);
+GL_API void GL_APIENTRY glMatrixIndexPointerOES (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer);
+GL_API void GL_APIENTRY glWeightPointerOES (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer);
+#endif
+typedef void (GL_APIENTRYP PFNGLCURRENTPALETTEMATRIXOESPROC) (GLuint matrixpaletteindex);
+typedef void (GL_APIENTRYP PFNGLLOADPALETTEFROMMODELVIEWMATRIXOESPROC) (void);
+typedef void (GL_APIENTRYP PFNGLMATRIXINDEXPOINTEROESPROC) (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer);
+typedef void (GL_APIENTRYP PFNGLWEIGHTPOINTEROESPROC) (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer);
+#endif
+
+/* GL_OES_packed_depth_stencil */
+#ifndef GL_OES_packed_depth_stencil
+#define GL_OES_packed_depth_stencil 1
+#endif
+
+/* GL_OES_required_internalformat */
+#ifndef GL_OES_required_internalformat
+#define GL_OES_required_internalformat 1
+#endif
+
+/* GL_OES_query_matrix */
+#ifndef GL_OES_query_matrix
+#define GL_OES_query_matrix 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API GLbitfield GL_APIENTRY glQueryMatrixxOES (GLfixed mantissa[16], GLint exponent[16]);
+#endif
+typedef GLbitfield (GL_APIENTRYP PFNGLQUERYMATRIXXOESPROC) (GLfixed mantissa[16], GLint exponent[16]);
+#endif
+
+/* GL_OES_rgb8_rgba8 */
+#ifndef GL_OES_rgb8_rgba8
+#define GL_OES_rgb8_rgba8 1
+#endif
+
+/* GL_OES_single_precision */
+#ifndef GL_OES_single_precision
+#define GL_OES_single_precision 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY glDepthRangefOES (GLclampf zNear, GLclampf zFar);
+GL_API void GL_APIENTRY glFrustumfOES (GLfloat left, GLfloat right, GLfloat bottom, GLfloat top, GLfloat zNear, GLfloat zFar);
+GL_API void GL_APIENTRY glOrthofOES (GLfloat left, GLfloat right, GLfloat bottom, GLfloat top, GLfloat zNear, GLfloat zFar);
+GL_API void GL_APIENTRY glClipPlanefOES (GLenum plane, const GLfloat *equation);
+GL_API void GL_APIENTRY glGetClipPlanefOES (GLenum pname, GLfloat eqn[4]);
+GL_API void GL_APIENTRY glClearDepthfOES (GLclampf depth);
+#endif
+typedef void (GL_APIENTRYP PFNGLDEPTHRANGEFOESPROC) (GLclampf zNear, GLclampf zFar);
+typedef void (GL_APIENTRYP PFNGLFRUSTUMFOESPROC) (GLfloat left, GLfloat right, GLfloat bottom, GLfloat top, GLfloat zNear, GLfloat zFar);
+typedef void (GL_APIENTRYP PFNGLORTHOFOESPROC) (GLfloat left, GLfloat right, GLfloat bottom, GLfloat top, GLfloat zNear, GLfloat zFar);
+typedef void (GL_APIENTRYP PFNGLCLIPPLANEFOESPROC) (GLenum plane, const GLfloat *equation);
+typedef void (GL_APIENTRYP PFNGLGETCLIPPLANEFOESPROC) (GLenum pname, GLfloat eqn[4]);
+typedef void (GL_APIENTRYP PFNGLCLEARDEPTHFOESPROC) (GLclampf depth);
+#endif
+
+/* GL_OES_stencil1 */
+#ifndef GL_OES_stencil1
+#define GL_OES_stencil1 1
+#endif
+
+/* GL_OES_stencil4 */
+#ifndef GL_OES_stencil4
+#define GL_OES_stencil4 1
+#endif
+
+/* GL_OES_stencil8 */
+#ifndef GL_OES_stencil8
+#define GL_OES_stencil8 1
+#endif
+
+/* GL_OES_stencil_wrap */
+#ifndef GL_OES_stencil_wrap
+#define GL_OES_stencil_wrap 1
+#endif
+
+/* GL_OES_texture_cube_map */
+#ifndef GL_OES_texture_cube_map
+#define GL_OES_texture_cube_map 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY glTexGenfOES (GLenum coord, GLenum pname, GLfloat param);
+GL_API void GL_APIENTRY glTexGenfvOES (GLenum coord, GLenum pname, const GLfloat *params);
+GL_API void GL_APIENTRY glTexGeniOES (GLenum coord, GLenum pname, GLint param);
+GL_API void GL_APIENTRY glTexGenivOES (GLenum coord, GLenum pname, const GLint *params);
+GL_API void GL_APIENTRY glTexGenxOES (GLenum coord, GLenum pname, GLfixed param);
+GL_API void GL_APIENTRY glTexGenxvOES (GLenum coord, GLenum pname, const GLfixed *params);
+GL_API void GL_APIENTRY glGetTexGenfvOES (GLenum coord, GLenum pname, GLfloat *params);
+GL_API void GL_APIENTRY glGetTexGenivOES (GLenum coord, GLenum pname, GLint *params);
+GL_API void GL_APIENTRY glGetTexGenxvOES (GLenum coord, GLenum pname, GLfixed *params);
+#endif
+typedef void (GL_APIENTRYP PFNGLTEXGENFOESPROC) (GLenum coord, GLenum pname, GLfloat param);
+typedef void (GL_APIENTRYP PFNGLTEXGENFVOESPROC) (GLenum coord, GLenum pname, const GLfloat *params);
+typedef void (GL_APIENTRYP PFNGLTEXGENIOESPROC) (GLenum coord, GLenum pname, GLint param);
+typedef void (GL_APIENTRYP PFNGLTEXGENIVOESPROC) (GLenum coord, GLenum pname, const GLint *params);
+typedef void (GL_APIENTRYP PFNGLTEXGENXOESPROC) (GLenum coord, GLenum pname, GLfixed param);
+typedef void (GL_APIENTRYP PFNGLTEXGENXVOESPROC) (GLenum coord, GLenum pname, const GLfixed *params);
+typedef void (GL_APIENTRYP PFNGLGETTEXGENFVOESPROC) (GLenum coord, GLenum pname, GLfloat *params);
+typedef void (GL_APIENTRYP PFNGLGETTEXGENIVOESPROC) (GLenum coord, GLenum pname, GLint *params);
+typedef void (GL_APIENTRYP PFNGLGETTEXGENXVOESPROC) (GLenum coord, GLenum pname, GLfixed *params);
+#endif
+
+/* GL_OES_texture_env_crossbar */
+#ifndef GL_OES_texture_env_crossbar
+#define GL_OES_texture_env_crossbar 1
+#endif
+
+/* GL_OES_texture_mirrored_repeat */
+#ifndef GL_OES_texture_mirrored_repeat
+#define GL_OES_texture_mirrored_repeat 1
+#endif
+
+/* GL_OES_vertex_array_object */
+#ifndef GL_OES_vertex_array_object
+#define GL_OES_vertex_array_object 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY glBindVertexArrayOES (GLuint array);
+GL_API void GL_APIENTRY glDeleteVertexArraysOES (GLsizei n, const GLuint *arrays);
+GL_API void GL_APIENTRY glGenVertexArraysOES (GLsizei n, GLuint *arrays);
+GL_API GLboolean GL_APIENTRY glIsVertexArrayOES (GLuint array);
+#endif
+typedef void (GL_APIENTRYP PFNGLBINDVERTEXARRAYOESPROC) (GLuint array);
+typedef void (GL_APIENTRYP PFNGLDELETEVERTEXARRAYSOESPROC) (GLsizei n, const GLuint *arrays);
+typedef void (GL_APIENTRYP PFNGLGENVERTEXARRAYSOESPROC) (GLsizei n, GLuint *arrays);
+typedef GLboolean (GL_APIENTRYP PFNGLISVERTEXARRAYOESPROC) (GLuint array);
+#endif
+
+/*------------------------------------------------------------------------*
+ * AMD extension functions
+ *------------------------------------------------------------------------*/
+
+/* GL_AMD_compressed_3DC_texture */
+#ifndef GL_AMD_compressed_3DC_texture
+#define GL_AMD_compressed_3DC_texture 1
+#endif
+
+/* GL_AMD_compressed_ATC_texture */
+#ifndef GL_AMD_compressed_ATC_texture
+#define GL_AMD_compressed_ATC_texture 1
+#endif
+
+/*------------------------------------------------------------------------*
+ * APPLE extension functions
+ *------------------------------------------------------------------------*/
+
+/* GL_APPLE_copy_texture_levels */
+#ifndef GL_APPLE_copy_texture_levels
+#define GL_APPLE_copy_texture_levels 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY glCopyTextureLevelsAPPLE (GLuint destinationTexture, GLuint sourceTexture, GLint sourceBaseLevel, GLsizei sourceLevelCount);
+#endif
+typedef void (GL_APIENTRYP PFNGLCOPYTEXTURELEVELSAPPLEPROC) (GLuint destinationTexture, GLuint sourceTexture, GLint sourceBaseLevel, GLsizei sourceLevelCount);
+#endif
+
+/* GL_APPLE_framebuffer_multisample */
+#ifndef GL_APPLE_framebuffer_multisample
+#define GL_APPLE_framebuffer_multisample 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY glRenderbufferStorageMultisampleAPPLE (GLenum, GLsizei, GLenum, GLsizei, GLsizei);
+GL_API void GL_APIENTRY glResolveMultisampleFramebufferAPPLE (void);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (GL_APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEAPPLEPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (GL_APIENTRYP PFNGLRESOLVEMULTISAMPLEFRAMEBUFFERAPPLEPROC) (void);
+#endif
+
+/* GL_APPLE_sync */
+#ifndef GL_APPLE_sync
+#define GL_APPLE_sync 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API GLsync GL_APIENTRY glFenceSyncAPPLE (GLenum condition, GLbitfield flags);
+GL_API GLboolean GL_APIENTRY glIsSyncAPPLE (GLsync sync);
+GL_API void GL_APIENTRY glDeleteSyncAPPLE (GLsync sync);
+GL_API GLenum GL_APIENTRY glClientWaitSyncAPPLE (GLsync sync, GLbitfield flags, GLuint64 timeout);
+GL_API void GL_APIENTRY glWaitSyncAPPLE (GLsync sync, GLbitfield flags, GLuint64 timeout);
+GL_API void GL_APIENTRY glGetInteger64vAPPLE (GLenum pname, GLint64 *params);
+GL_API void GL_APIENTRY glGetSyncivAPPLE (GLsync sync, GLenum pname, GLsizei bufSize, GLsizei *length, GLint *values);
+#endif
+typedef GLsync (GL_APIENTRYP PFNGLFENCESYNCAPPLEPROC) (GLenum condition, GLbitfield flags);
+typedef GLboolean (GL_APIENTRYP PFNGLISSYNCAPPLEPROC) (GLsync sync);
+typedef void (GL_APIENTRYP PFNGLDELETESYNCAPPLEPROC) (GLsync sync);
+typedef GLenum (GL_APIENTRYP PFNGLCLIENTWAITSYNCAPPLEPROC) (GLsync sync, GLbitfield flags, GLuint64 timeout);
+typedef void (GL_APIENTRYP PFNGLWAITSYNCAPPLEPROC) (GLsync sync, GLbitfield flags, GLuint64 timeout);
+typedef void (GL_APIENTRYP PFNGLGETINTEGER64VAPPLEPROC) (GLenum pname, GLint64 *params);
+typedef void (GL_APIENTRYP PFNGLGETSYNCIVAPPLEPROC) (GLsync sync, GLenum pname, GLsizei bufSize, GLsizei *length, GLint *values);
+#endif
+
+/* GL_APPLE_texture_2D_limited_npot */
+#ifndef GL_APPLE_texture_2D_limited_npot
+#define GL_APPLE_texture_2D_limited_npot 1
+#endif
+
+/* GL_APPLE_texture_format_BGRA8888 */
+#ifndef GL_APPLE_texture_format_BGRA8888
+#define GL_APPLE_texture_format_BGRA8888 1
+#endif
+
+/* GL_APPLE_texture_max_level */
+#ifndef GL_APPLE_texture_max_level
+#define GL_APPLE_texture_max_level 1
+#endif
+
+/*------------------------------------------------------------------------*
+ * ARM extension functions
+ *------------------------------------------------------------------------*/
+
+/* GL_ARM_rgba8 */
+#ifndef GL_ARM_rgba8
+#define GL_ARM_rgba8 1
+#endif
+
+/*------------------------------------------------------------------------*
+ * EXT extension functions
+ *------------------------------------------------------------------------*/
+
+/* GL_EXT_blend_minmax */
+#ifndef GL_EXT_blend_minmax
+#define GL_EXT_blend_minmax 1
+#endif
+
+/* GL_EXT_discard_framebuffer */
+#ifndef GL_EXT_discard_framebuffer
+#define GL_EXT_discard_framebuffer 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY glDiscardFramebufferEXT (GLenum target, GLsizei numAttachments, const GLenum *attachments);
+#endif
+typedef void (GL_APIENTRYP PFNGLDISCARDFRAMEBUFFEREXTPROC) (GLenum target, GLsizei numAttachments, const GLenum *attachments);
+#endif
+
+/* GL_EXT_map_buffer_range */
+#ifndef GL_EXT_map_buffer_range
+#define GL_EXT_map_buffer_range 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY *glMapBufferRangeEXT (GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access);
+GL_API void GL_APIENTRY glFlushMappedBufferRangeEXT (GLenum target, GLintptr offset, GLsizeiptr length);
+#endif
+typedef void* (GL_APIENTRYP PFNGLMAPBUFFERRANGEEXTPROC) (GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access);
+typedef void (GL_APIENTRYP PFNGLFLUSHMAPPEDBUFFERRANGEEXTPROC) (GLenum target, GLintptr offset, GLsizeiptr length);
+#endif
+
+/* GL_EXT_multisampled_render_to_texture */
+#ifndef GL_EXT_multisampled_render_to_texture
+#define GL_EXT_multisampled_render_to_texture 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY glRenderbufferStorageMultisampleEXT (GLenum, GLsizei, GLenum, GLsizei, GLsizei);
+GL_API void GL_APIENTRY glFramebufferTexture2DMultisampleEXT (GLenum, GLenum, GLenum, GLuint, GLint, GLsizei);
+#endif
+typedef void (GL_APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (GL_APIENTRYP PFNGLFRAMEBUFFERTEXTURE2DMULTISAMPLEEXTPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLsizei samples);
+#endif
+
+/* GL_EXT_multi_draw_arrays */
+#ifndef GL_EXT_multi_draw_arrays
+#define GL_EXT_multi_draw_arrays 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY glMultiDrawArraysEXT (GLenum, GLint *, GLsizei *, GLsizei);
+GL_API void GL_APIENTRY glMultiDrawElementsEXT (GLenum, const GLsizei *, GLenum, const GLvoid* *, GLsizei);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (GL_APIENTRYP PFNGLMULTIDRAWARRAYSEXTPROC) (GLenum mode, GLint *first, GLsizei *count, GLsizei primcount);
+typedef void (GL_APIENTRYP PFNGLMULTIDRAWELEMENTSEXTPROC) (GLenum mode, const GLsizei *count, GLenum type, const GLvoid* *indices, GLsizei primcount);
+#endif
+
+/* GL_EXT_read_format_bgra */
+#ifndef GL_EXT_read_format_bgra
+#define GL_EXT_read_format_bgra 1
+#endif
+
+/* GL_EXT_robustness */
+#ifndef GL_EXT_robustness
+#define GL_EXT_robustness 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API GLenum GL_APIENTRY glGetGraphicsResetStatusEXT (void);
+GL_API void GL_APIENTRY glReadnPixelsEXT (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLsizei bufSize, void *data);
+GL_API void GL_APIENTRY glGetnUniformfvEXT (GLuint program, GLint location, GLsizei bufSize, float *params);
+GL_API void GL_APIENTRY glGetnUniformivEXT (GLuint program, GLint location, GLsizei bufSize, GLint *params);
+#endif
+typedef GLenum (GL_APIENTRYP PFNGLGETGRAPHICSRESETSTATUSEXTPROC) (void);
+typedef void (GL_APIENTRYP PFNGLREADNPIXELSEXTPROC) (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLsizei bufSize, void *data);
+typedef void (GL_APIENTRYP PFNGLGETNUNIFORMFVEXTPROC) (GLuint program, GLint location, GLsizei bufSize, float *params);
+typedef void (GL_APIENTRYP PFNGLGETNUNIFORMIVEXTPROC) (GLuint program, GLint location, GLsizei bufSize, GLint *params);
+#endif
+
+/* GL_EXT_sRGB */
+#ifndef GL_EXT_sRGB
+#define GL_EXT_sRGB 1
+#endif
+
+/* GL_EXT_texture_compression_dxt1 */
+#ifndef GL_EXT_texture_compression_dxt1
+#define GL_EXT_texture_compression_dxt1 1
+#endif
+
+/* GL_EXT_texture_filter_anisotropic */
+#ifndef GL_EXT_texture_filter_anisotropic
+#define GL_EXT_texture_filter_anisotropic 1
+#endif
+
+/* GL_EXT_texture_format_BGRA8888 */
+#ifndef GL_EXT_texture_format_BGRA8888
+#define GL_EXT_texture_format_BGRA8888 1
+#endif
+
+/* GL_EXT_texture_lod_bias */
+#ifndef GL_EXT_texture_lod_bias
+#define GL_EXT_texture_lod_bias 1
+#endif
+
+/* GL_EXT_texture_storage */
+#ifndef GL_EXT_texture_storage
+#define GL_EXT_texture_storage 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY glTexStorage1DEXT (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width);
+GL_API void GL_APIENTRY glTexStorage2DEXT (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
+GL_API void GL_APIENTRY glTexStorage3DEXT (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
+GL_API void GL_APIENTRY glTextureStorage1DEXT (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width);
+GL_API void GL_APIENTRY glTextureStorage2DEXT (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
+GL_API void GL_APIENTRY glTextureStorage3DEXT (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
+#endif
+typedef void (GL_APIENTRYP PFNGLTEXSTORAGE1DEXTPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width);
+typedef void (GL_APIENTRYP PFNGLTEXSTORAGE2DEXTPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (GL_APIENTRYP PFNGLTEXSTORAGE3DEXTPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
+typedef void (GL_APIENTRYP PFNGLTEXTURESTORAGE1DEXTPROC) (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width);
+typedef void (GL_APIENTRYP PFNGLTEXTURESTORAGE2DEXTPROC) (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (GL_APIENTRYP PFNGLTEXTURESTORAGE3DEXTPROC) (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
+#endif
+
+/*------------------------------------------------------------------------*
+ * IMG extension functions
+ *------------------------------------------------------------------------*/
+
+/* GL_IMG_read_format */
+#ifndef GL_IMG_read_format
+#define GL_IMG_read_format 1
+#endif
+
+/* GL_IMG_texture_compression_pvrtc */
+#ifndef GL_IMG_texture_compression_pvrtc
+#define GL_IMG_texture_compression_pvrtc 1
+#endif
+
+/* GL_IMG_texture_env_enhanced_fixed_function */
+#ifndef GL_IMG_texture_env_enhanced_fixed_function
+#define GL_IMG_texture_env_enhanced_fixed_function 1
+#endif
+
+/* GL_IMG_user_clip_plane */
+#ifndef GL_IMG_user_clip_plane
+#define GL_IMG_user_clip_plane 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY glClipPlanefIMG (GLenum, const GLfloat *);
+GL_API void GL_APIENTRY glClipPlanexIMG (GLenum, const GLfixed *);
+#endif
+typedef void (GL_APIENTRYP PFNGLCLIPPLANEFIMGPROC) (GLenum p, const GLfloat *eqn);
+typedef void (GL_APIENTRYP PFNGLCLIPPLANEXIMGPROC) (GLenum p, const GLfixed *eqn);
+#endif
+
+/* GL_IMG_multisampled_render_to_texture */
+#ifndef GL_IMG_multisampled_render_to_texture
+#define GL_IMG_multisampled_render_to_texture 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY glRenderbufferStorageMultisampleIMG (GLenum, GLsizei, GLenum, GLsizei, GLsizei);
+GL_API void GL_APIENTRY glFramebufferTexture2DMultisampleIMG (GLenum, GLenum, GLenum, GLuint, GLint, GLsizei);
+#endif
+typedef void (GL_APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEIMGPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (GL_APIENTRYP PFNGLFRAMEBUFFERTEXTURE2DMULTISAMPLEIMGPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLsizei samples);
+#endif
+
+/*------------------------------------------------------------------------*
+ * NV extension functions
+ *------------------------------------------------------------------------*/
+
+/* NV_fence */
+#ifndef GL_NV_fence
+#define GL_NV_fence 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY glDeleteFencesNV (GLsizei, const GLuint *);
+GL_API void GL_APIENTRY glGenFencesNV (GLsizei, GLuint *);
+GL_API GLboolean GL_APIENTRY glIsFenceNV (GLuint);
+GL_API GLboolean GL_APIENTRY glTestFenceNV (GLuint);
+GL_API void GL_APIENTRY glGetFenceivNV (GLuint, GLenum, GLint *);
+GL_API void GL_APIENTRY glFinishFenceNV (GLuint);
+GL_API void GL_APIENTRY glSetFenceNV (GLuint, GLenum);
+#endif
+typedef void (GL_APIENTRYP PFNGLDELETEFENCESNVPROC) (GLsizei n, const GLuint *fences);
+typedef void (GL_APIENTRYP PFNGLGENFENCESNVPROC) (GLsizei n, GLuint *fences);
+typedef GLboolean (GL_APIENTRYP PFNGLISFENCENVPROC) (GLuint fence);
+typedef GLboolean (GL_APIENTRYP PFNGLTESTFENCENVPROC) (GLuint fence);
+typedef void (GL_APIENTRYP PFNGLGETFENCEIVNVPROC) (GLuint fence, GLenum pname, GLint *params);
+typedef void (GL_APIENTRYP PFNGLFINISHFENCENVPROC) (GLuint fence);
+typedef void (GL_APIENTRYP PFNGLSETFENCENVPROC) (GLuint fence, GLenum condition);
+#endif
+
+/*------------------------------------------------------------------------*
+ * QCOM extension functions
+ *------------------------------------------------------------------------*/
+
+/* GL_QCOM_driver_control */
+#ifndef GL_QCOM_driver_control
+#define GL_QCOM_driver_control 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY glGetDriverControlsQCOM (GLint *num, GLsizei size, GLuint *driverControls);
+GL_API void GL_APIENTRY glGetDriverControlStringQCOM (GLuint driverControl, GLsizei bufSize, GLsizei *length, GLchar *driverControlString);
+GL_API void GL_APIENTRY glEnableDriverControlQCOM (GLuint driverControl);
+GL_API void GL_APIENTRY glDisableDriverControlQCOM (GLuint driverControl);
+#endif
+typedef void (GL_APIENTRYP PFNGLGETDRIVERCONTROLSQCOMPROC) (GLint *num, GLsizei size, GLuint *driverControls);
+typedef void (GL_APIENTRYP PFNGLGETDRIVERCONTROLSTRINGQCOMPROC) (GLuint driverControl, GLsizei bufSize, GLsizei *length, GLchar *driverControlString);
+typedef void (GL_APIENTRYP PFNGLENABLEDRIVERCONTROLQCOMPROC) (GLuint driverControl);
+typedef void (GL_APIENTRYP PFNGLDISABLEDRIVERCONTROLQCOMPROC) (GLuint driverControl);
+#endif
+
+/* GL_QCOM_extended_get */
+#ifndef GL_QCOM_extended_get
+#define GL_QCOM_extended_get 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY glExtGetTexturesQCOM (GLuint *textures, GLint maxTextures, GLint *numTextures);
+GL_API void GL_APIENTRY glExtGetBuffersQCOM (GLuint *buffers, GLint maxBuffers, GLint *numBuffers);
+GL_API void GL_APIENTRY glExtGetRenderbuffersQCOM (GLuint *renderbuffers, GLint maxRenderbuffers, GLint *numRenderbuffers);
+GL_API void GL_APIENTRY glExtGetFramebuffersQCOM (GLuint *framebuffers, GLint maxFramebuffers, GLint *numFramebuffers);
+GL_API void GL_APIENTRY glExtGetTexLevelParameterivQCOM (GLuint texture, GLenum face, GLint level, GLenum pname, GLint *params);
+GL_API void GL_APIENTRY glExtTexObjectStateOverrideiQCOM (GLenum target, GLenum pname, GLint param);
+GL_API void GL_APIENTRY glExtGetTexSubImageQCOM (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, GLvoid *texels);
+GL_API void GL_APIENTRY glExtGetBufferPointervQCOM (GLenum target, GLvoid **params);
+#endif
+typedef void (GL_APIENTRYP PFNGLEXTGETTEXTURESQCOMPROC) (GLuint *textures, GLint maxTextures, GLint *numTextures);
+typedef void (GL_APIENTRYP PFNGLEXTGETBUFFERSQCOMPROC) (GLuint *buffers, GLint maxBuffers, GLint *numBuffers);
+typedef void (GL_APIENTRYP PFNGLEXTGETRENDERBUFFERSQCOMPROC) (GLuint *renderbuffers, GLint maxRenderbuffers, GLint *numRenderbuffers);
+typedef void (GL_APIENTRYP PFNGLEXTGETFRAMEBUFFERSQCOMPROC) (GLuint *framebuffers, GLint maxFramebuffers, GLint *numFramebuffers);
+typedef void (GL_APIENTRYP PFNGLEXTGETTEXLEVELPARAMETERIVQCOMPROC) (GLuint texture, GLenum face, GLint level, GLenum pname, GLint *params);
+typedef void (GL_APIENTRYP PFNGLEXTTEXOBJECTSTATEOVERRIDEIQCOMPROC) (GLenum target, GLenum pname, GLint param);
+typedef void (GL_APIENTRYP PFNGLEXTGETTEXSUBIMAGEQCOMPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, GLvoid *texels);
+typedef void (GL_APIENTRYP PFNGLEXTGETBUFFERPOINTERVQCOMPROC) (GLenum target, GLvoid **params);
+#endif
+
+/* GL_QCOM_extended_get2 */
+#ifndef GL_QCOM_extended_get2
+#define GL_QCOM_extended_get2 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY glExtGetShadersQCOM (GLuint *shaders, GLint maxShaders, GLint *numShaders);
+GL_API void GL_APIENTRY glExtGetProgramsQCOM (GLuint *programs, GLint maxPrograms, GLint *numPrograms);
+GL_API GLboolean GL_APIENTRY glExtIsProgramBinaryQCOM (GLuint program);
+GL_API void GL_APIENTRY glExtGetProgramBinarySourceQCOM (GLuint program, GLenum shadertype, GLchar *source, GLint *length);
+#endif
+typedef void (GL_APIENTRYP PFNGLEXTGETSHADERSQCOMPROC) (GLuint *shaders, GLint maxShaders, GLint *numShaders);
+typedef void (GL_APIENTRYP PFNGLEXTGETPROGRAMSQCOMPROC) (GLuint *programs, GLint maxPrograms, GLint *numPrograms);
+typedef GLboolean (GL_APIENTRYP PFNGLEXTISPROGRAMBINARYQCOMPROC) (GLuint program);
+typedef void (GL_APIENTRYP PFNGLEXTGETPROGRAMBINARYSOURCEQCOMPROC) (GLuint program, GLenum shadertype, GLchar *source, GLint *length);
+#endif
+
+/* GL_QCOM_perfmon_global_mode */
+#ifndef GL_QCOM_perfmon_global_mode
+#define GL_QCOM_perfmon_global_mode 1
+#endif
+
+/* GL_QCOM_writeonly_rendering */
+#ifndef GL_QCOM_writeonly_rendering
+#define GL_QCOM_writeonly_rendering 1
+#endif
+
+/* GL_QCOM_tiled_rendering */
+#ifndef GL_QCOM_tiled_rendering
+#define GL_QCOM_tiled_rendering 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_API void GL_APIENTRY glStartTilingQCOM (GLuint x, GLuint y, GLuint width, GLuint height, GLbitfield preserveMask);
+GL_API void GL_APIENTRY glEndTilingQCOM (GLbitfield preserveMask);
+#endif
+typedef void (GL_APIENTRYP PFNGLSTARTTILINGQCOMPROC) (GLuint x, GLuint y, GLuint width, GLuint height, GLbitfield preserveMask);
+typedef void (GL_APIENTRYP PFNGLENDTILINGQCOMPROC) (GLbitfield preserveMask);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __glext_h_ */
+
diff --git a/opengles-3.1/include/GLES/glplatform.h b/opengles-3.1/include/GLES/glplatform.h
new file mode 100644
index 0000000000..2db6ee2ca6
--- /dev/null
+++ b/opengles-3.1/include/GLES/glplatform.h
@@ -0,0 +1,30 @@
+#ifndef __glplatform_h_
+#define __glplatform_h_
+
+/* $Revision: 10601 $ on $Date:: 2010-03-04 22:15:27 -0800 #$ */
+
+/*
+ * This document is licensed under the SGI Free Software B License Version
+ * 2.0. For details, see http://oss.sgi.com/projects/FreeB/ .
+ */
+
+/* Platform-specific types and definitions for OpenGL ES 1.X  gl.h
+ *
+ * Adopters may modify khrplatform.h and this file to suit their platform.
+ * You are encouraged to submit all modifications to the Khronos group so that
+ * they can be included in future versions of this file.  Please submit changes
+ * by sending them to the public Khronos Bugzilla (http://khronos.org/bugzilla)
+ * by filing a bug against product "OpenGL-ES" component "Registry".
+ */
+
+#include <KHR/khrplatform.h>
+
+#ifndef GL_API
+#define GL_API      KHRONOS_APICALL
+#endif
+
+#ifndef GL_APIENTRY
+#define GL_APIENTRY KHRONOS_APIENTRY
+#endif
+
+#endif /* __glplatform_h_ */
diff --git a/opengles-3.1/include/GLES2/gl2.h b/opengles-3.1/include/GLES2/gl2.h
new file mode 100644
index 0000000000..b7fe76ee1c
--- /dev/null
+++ b/opengles-3.1/include/GLES2/gl2.h
@@ -0,0 +1,620 @@
+#ifndef __gl2_h_
+#define __gl2_h_
+
+/* $Revision: 16803 $ on $Date:: 2012-02-02 09:49:18 -0800 #$ */
+
+#include <GLES2/gl2platform.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * This document is licensed under the SGI Free Software B License Version
+ * 2.0. For details, see http://oss.sgi.com/projects/FreeB/ .
+ */
+
+/*-------------------------------------------------------------------------
+ * Data type definitions
+ *-----------------------------------------------------------------------*/
+
+typedef void             GLvoid;
+typedef char             GLchar;
+typedef unsigned int     GLenum;
+typedef unsigned char    GLboolean;
+typedef unsigned int     GLbitfield;
+typedef khronos_int8_t   GLbyte;
+typedef short            GLshort;
+typedef int              GLint;
+typedef int              GLsizei;
+typedef khronos_uint8_t  GLubyte;
+typedef unsigned short   GLushort;
+typedef unsigned int     GLuint;
+typedef khronos_float_t  GLfloat;
+typedef khronos_float_t  GLclampf;
+typedef khronos_int32_t  GLfixed;
+
+/* GL types for handling large vertex buffer objects */
+typedef khronos_intptr_t GLintptr;
+typedef khronos_ssize_t  GLsizeiptr;
+
+/* OpenGL ES core versions */
+#define GL_ES_VERSION_2_0                 1
+
+/* ClearBufferMask */
+#define GL_DEPTH_BUFFER_BIT               0x00000100
+#define GL_STENCIL_BUFFER_BIT             0x00000400
+#define GL_COLOR_BUFFER_BIT               0x00004000
+
+/* Boolean */
+#define GL_FALSE                          0
+#define GL_TRUE                           1
+
+/* BeginMode */
+#define GL_POINTS                         0x0000
+#define GL_LINES                          0x0001
+#define GL_LINE_LOOP                      0x0002
+#define GL_LINE_STRIP                     0x0003
+#define GL_TRIANGLES                      0x0004
+#define GL_TRIANGLE_STRIP                 0x0005
+#define GL_TRIANGLE_FAN                   0x0006
+
+/* AlphaFunction (not supported in ES20) */
+/*      GL_NEVER */
+/*      GL_LESS */
+/*      GL_EQUAL */
+/*      GL_LEQUAL */
+/*      GL_GREATER */
+/*      GL_NOTEQUAL */
+/*      GL_GEQUAL */
+/*      GL_ALWAYS */
+
+/* BlendingFactorDest */
+#define GL_ZERO                           0
+#define GL_ONE                            1
+#define GL_SRC_COLOR                      0x0300
+#define GL_ONE_MINUS_SRC_COLOR            0x0301
+#define GL_SRC_ALPHA                      0x0302
+#define GL_ONE_MINUS_SRC_ALPHA            0x0303
+#define GL_DST_ALPHA                      0x0304
+#define GL_ONE_MINUS_DST_ALPHA            0x0305
+
+/* BlendingFactorSrc */
+/*      GL_ZERO */
+/*      GL_ONE */
+#define GL_DST_COLOR                      0x0306
+#define GL_ONE_MINUS_DST_COLOR            0x0307
+#define GL_SRC_ALPHA_SATURATE             0x0308
+/*      GL_SRC_ALPHA */
+/*      GL_ONE_MINUS_SRC_ALPHA */
+/*      GL_DST_ALPHA */
+/*      GL_ONE_MINUS_DST_ALPHA */
+
+/* BlendEquationSeparate */
+#define GL_FUNC_ADD                       0x8006
+#define GL_BLEND_EQUATION                 0x8009
+#define GL_BLEND_EQUATION_RGB             0x8009    /* same as BLEND_EQUATION */
+#define GL_BLEND_EQUATION_ALPHA           0x883D
+
+/* BlendSubtract */
+#define GL_FUNC_SUBTRACT                  0x800A
+#define GL_FUNC_REVERSE_SUBTRACT          0x800B
+
+/* Separate Blend Functions */
+#define GL_BLEND_DST_RGB                  0x80C8
+#define GL_BLEND_SRC_RGB                  0x80C9
+#define GL_BLEND_DST_ALPHA                0x80CA
+#define GL_BLEND_SRC_ALPHA                0x80CB
+#define GL_CONSTANT_COLOR                 0x8001
+#define GL_ONE_MINUS_CONSTANT_COLOR       0x8002
+#define GL_CONSTANT_ALPHA                 0x8003
+#define GL_ONE_MINUS_CONSTANT_ALPHA       0x8004
+#define GL_BLEND_COLOR                    0x8005
+
+/* Buffer Objects */
+#define GL_ARRAY_BUFFER                   0x8892
+#define GL_ELEMENT_ARRAY_BUFFER           0x8893
+#define GL_ARRAY_BUFFER_BINDING           0x8894
+#define GL_ELEMENT_ARRAY_BUFFER_BINDING   0x8895
+
+#define GL_STREAM_DRAW                    0x88E0
+#define GL_STATIC_DRAW                    0x88E4
+#define GL_DYNAMIC_DRAW                   0x88E8
+
+#define GL_BUFFER_SIZE                    0x8764
+#define GL_BUFFER_USAGE                   0x8765
+
+#define GL_CURRENT_VERTEX_ATTRIB          0x8626
+
+/* CullFaceMode */
+#define GL_FRONT                          0x0404
+#define GL_BACK                           0x0405
+#define GL_FRONT_AND_BACK                 0x0408
+
+/* DepthFunction */
+/*      GL_NEVER */
+/*      GL_LESS */
+/*      GL_EQUAL */
+/*      GL_LEQUAL */
+/*      GL_GREATER */
+/*      GL_NOTEQUAL */
+/*      GL_GEQUAL */
+/*      GL_ALWAYS */
+
+/* EnableCap */
+#define GL_TEXTURE_2D                     0x0DE1
+#define GL_CULL_FACE                      0x0B44
+#define GL_BLEND                          0x0BE2
+#define GL_DITHER                         0x0BD0
+#define GL_STENCIL_TEST                   0x0B90
+#define GL_DEPTH_TEST                     0x0B71
+#define GL_SCISSOR_TEST                   0x0C11
+#define GL_POLYGON_OFFSET_FILL            0x8037
+#define GL_SAMPLE_ALPHA_TO_COVERAGE       0x809E
+#define GL_SAMPLE_COVERAGE                0x80A0
+
+/* ErrorCode */
+#define GL_NO_ERROR                       0
+#define GL_INVALID_ENUM                   0x0500
+#define GL_INVALID_VALUE                  0x0501
+#define GL_INVALID_OPERATION              0x0502
+#define GL_OUT_OF_MEMORY                  0x0505
+
+/* FrontFaceDirection */
+#define GL_CW                             0x0900
+#define GL_CCW                            0x0901
+
+/* GetPName */
+#define GL_LINE_WIDTH                     0x0B21
+#define GL_ALIASED_POINT_SIZE_RANGE       0x846D
+#define GL_ALIASED_LINE_WIDTH_RANGE       0x846E
+#define GL_CULL_FACE_MODE                 0x0B45
+#define GL_FRONT_FACE                     0x0B46
+#define GL_DEPTH_RANGE                    0x0B70
+#define GL_DEPTH_WRITEMASK                0x0B72
+#define GL_DEPTH_CLEAR_VALUE              0x0B73
+#define GL_DEPTH_FUNC                     0x0B74
+#define GL_STENCIL_CLEAR_VALUE            0x0B91
+#define GL_STENCIL_FUNC                   0x0B92
+#define GL_STENCIL_FAIL                   0x0B94
+#define GL_STENCIL_PASS_DEPTH_FAIL        0x0B95
+#define GL_STENCIL_PASS_DEPTH_PASS        0x0B96
+#define GL_STENCIL_REF                    0x0B97
+#define GL_STENCIL_VALUE_MASK             0x0B93
+#define GL_STENCIL_WRITEMASK              0x0B98
+#define GL_STENCIL_BACK_FUNC              0x8800
+#define GL_STENCIL_BACK_FAIL              0x8801
+#define GL_STENCIL_BACK_PASS_DEPTH_FAIL   0x8802
+#define GL_STENCIL_BACK_PASS_DEPTH_PASS   0x8803
+#define GL_STENCIL_BACK_REF               0x8CA3
+#define GL_STENCIL_BACK_VALUE_MASK        0x8CA4
+#define GL_STENCIL_BACK_WRITEMASK         0x8CA5
+#define GL_VIEWPORT                       0x0BA2
+#define GL_SCISSOR_BOX                    0x0C10
+/*      GL_SCISSOR_TEST */
+#define GL_COLOR_CLEAR_VALUE              0x0C22
+#define GL_COLOR_WRITEMASK                0x0C23
+#define GL_UNPACK_ALIGNMENT               0x0CF5
+#define GL_PACK_ALIGNMENT                 0x0D05
+#define GL_MAX_TEXTURE_SIZE               0x0D33
+#define GL_MAX_VIEWPORT_DIMS              0x0D3A
+#define GL_SUBPIXEL_BITS                  0x0D50
+#define GL_RED_BITS                       0x0D52
+#define GL_GREEN_BITS                     0x0D53
+#define GL_BLUE_BITS                      0x0D54
+#define GL_ALPHA_BITS                     0x0D55
+#define GL_DEPTH_BITS                     0x0D56
+#define GL_STENCIL_BITS                   0x0D57
+#define GL_POLYGON_OFFSET_UNITS           0x2A00
+/*      GL_POLYGON_OFFSET_FILL */
+#define GL_POLYGON_OFFSET_FACTOR          0x8038
+#define GL_TEXTURE_BINDING_2D             0x8069
+#define GL_SAMPLE_BUFFERS                 0x80A8
+#define GL_SAMPLES                        0x80A9
+#define GL_SAMPLE_COVERAGE_VALUE          0x80AA
+#define GL_SAMPLE_COVERAGE_INVERT         0x80AB
+
+/* GetTextureParameter */
+/*      GL_TEXTURE_MAG_FILTER */
+/*      GL_TEXTURE_MIN_FILTER */
+/*      GL_TEXTURE_WRAP_S */
+/*      GL_TEXTURE_WRAP_T */
+
+#define GL_NUM_COMPRESSED_TEXTURE_FORMATS 0x86A2
+#define GL_COMPRESSED_TEXTURE_FORMATS     0x86A3
+
+/* HintMode */
+#define GL_DONT_CARE                      0x1100
+#define GL_FASTEST                        0x1101
+#define GL_NICEST                         0x1102
+
+/* HintTarget */
+#define GL_GENERATE_MIPMAP_HINT            0x8192
+
+/* DataType */
+#define GL_BYTE                           0x1400
+#define GL_UNSIGNED_BYTE                  0x1401
+#define GL_SHORT                          0x1402
+#define GL_UNSIGNED_SHORT                 0x1403
+#define GL_INT                            0x1404
+#define GL_UNSIGNED_INT                   0x1405
+#define GL_FLOAT                          0x1406
+#define GL_FIXED                          0x140C
+
+/* PixelFormat */
+#define GL_DEPTH_COMPONENT                0x1902
+#define GL_ALPHA                          0x1906
+#define GL_RGB                            0x1907
+#define GL_RGBA                           0x1908
+#define GL_LUMINANCE                      0x1909
+#define GL_LUMINANCE_ALPHA                0x190A
+
+/* PixelType */
+/*      GL_UNSIGNED_BYTE */
+#define GL_UNSIGNED_SHORT_4_4_4_4         0x8033
+#define GL_UNSIGNED_SHORT_5_5_5_1         0x8034
+#define GL_UNSIGNED_SHORT_5_6_5           0x8363
+
+/* Shaders */
+#define GL_FRAGMENT_SHADER                  0x8B30
+#define GL_VERTEX_SHADER                    0x8B31
+#define GL_MAX_VERTEX_ATTRIBS               0x8869
+#define GL_MAX_VERTEX_UNIFORM_VECTORS       0x8DFB
+#define GL_MAX_VARYING_VECTORS              0x8DFC
+#define GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS 0x8B4D
+#define GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS   0x8B4C
+#define GL_MAX_TEXTURE_IMAGE_UNITS          0x8872
+#define GL_MAX_FRAGMENT_UNIFORM_VECTORS     0x8DFD
+#define GL_SHADER_TYPE                      0x8B4F
+#define GL_DELETE_STATUS                    0x8B80
+#define GL_LINK_STATUS                      0x8B82
+#define GL_VALIDATE_STATUS                  0x8B83
+#define GL_ATTACHED_SHADERS                 0x8B85
+#define GL_ACTIVE_UNIFORMS                  0x8B86
+#define GL_ACTIVE_UNIFORM_MAX_LENGTH        0x8B87
+#define GL_ACTIVE_ATTRIBUTES                0x8B89
+#define GL_ACTIVE_ATTRIBUTE_MAX_LENGTH      0x8B8A
+#define GL_SHADING_LANGUAGE_VERSION         0x8B8C
+#define GL_CURRENT_PROGRAM                  0x8B8D
+
+/* StencilFunction */
+#define GL_NEVER                          0x0200
+#define GL_LESS                           0x0201
+#define GL_EQUAL                          0x0202
+#define GL_LEQUAL                         0x0203
+#define GL_GREATER                        0x0204
+#define GL_NOTEQUAL                       0x0205
+#define GL_GEQUAL                         0x0206
+#define GL_ALWAYS                         0x0207
+
+/* StencilOp */
+/*      GL_ZERO */
+#define GL_KEEP                           0x1E00
+#define GL_REPLACE                        0x1E01
+#define GL_INCR                           0x1E02
+#define GL_DECR                           0x1E03
+#define GL_INVERT                         0x150A
+#define GL_INCR_WRAP                      0x8507
+#define GL_DECR_WRAP                      0x8508
+
+/* StringName */
+#define GL_VENDOR                         0x1F00
+#define GL_RENDERER                       0x1F01
+#define GL_VERSION                        0x1F02
+#define GL_EXTENSIONS                     0x1F03
+
+/* TextureMagFilter */
+#define GL_NEAREST                        0x2600
+#define GL_LINEAR                         0x2601
+
+/* TextureMinFilter */
+/*      GL_NEAREST */
+/*      GL_LINEAR */
+#define GL_NEAREST_MIPMAP_NEAREST         0x2700
+#define GL_LINEAR_MIPMAP_NEAREST          0x2701
+#define GL_NEAREST_MIPMAP_LINEAR          0x2702
+#define GL_LINEAR_MIPMAP_LINEAR           0x2703
+
+/* TextureParameterName */
+#define GL_TEXTURE_MAG_FILTER             0x2800
+#define GL_TEXTURE_MIN_FILTER             0x2801
+#define GL_TEXTURE_WRAP_S                 0x2802
+#define GL_TEXTURE_WRAP_T                 0x2803
+
+/* TextureTarget */
+/*      GL_TEXTURE_2D */
+#define GL_TEXTURE                        0x1702
+
+#define GL_TEXTURE_CUBE_MAP               0x8513
+#define GL_TEXTURE_BINDING_CUBE_MAP       0x8514
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_X    0x8515
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X    0x8516
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y    0x8517
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y    0x8518
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z    0x8519
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z    0x851A
+#define GL_MAX_CUBE_MAP_TEXTURE_SIZE      0x851C
+
+/* TextureUnit */
+#define GL_TEXTURE0                       0x84C0
+#define GL_TEXTURE1                       0x84C1
+#define GL_TEXTURE2                       0x84C2
+#define GL_TEXTURE3                       0x84C3
+#define GL_TEXTURE4                       0x84C4
+#define GL_TEXTURE5                       0x84C5
+#define GL_TEXTURE6                       0x84C6
+#define GL_TEXTURE7                       0x84C7
+#define GL_TEXTURE8                       0x84C8
+#define GL_TEXTURE9                       0x84C9
+#define GL_TEXTURE10                      0x84CA
+#define GL_TEXTURE11                      0x84CB
+#define GL_TEXTURE12                      0x84CC
+#define GL_TEXTURE13                      0x84CD
+#define GL_TEXTURE14                      0x84CE
+#define GL_TEXTURE15                      0x84CF
+#define GL_TEXTURE16                      0x84D0
+#define GL_TEXTURE17                      0x84D1
+#define GL_TEXTURE18                      0x84D2
+#define GL_TEXTURE19                      0x84D3
+#define GL_TEXTURE20                      0x84D4
+#define GL_TEXTURE21                      0x84D5
+#define GL_TEXTURE22                      0x84D6
+#define GL_TEXTURE23                      0x84D7
+#define GL_TEXTURE24                      0x84D8
+#define GL_TEXTURE25                      0x84D9
+#define GL_TEXTURE26                      0x84DA
+#define GL_TEXTURE27                      0x84DB
+#define GL_TEXTURE28                      0x84DC
+#define GL_TEXTURE29                      0x84DD
+#define GL_TEXTURE30                      0x84DE
+#define GL_TEXTURE31                      0x84DF
+#define GL_ACTIVE_TEXTURE                 0x84E0
+
+/* TextureWrapMode */
+#define GL_REPEAT                         0x2901
+#define GL_CLAMP_TO_EDGE                  0x812F
+#define GL_MIRRORED_REPEAT                0x8370
+
+/* Uniform Types */
+#define GL_FLOAT_VEC2                     0x8B50
+#define GL_FLOAT_VEC3                     0x8B51
+#define GL_FLOAT_VEC4                     0x8B52
+#define GL_INT_VEC2                       0x8B53
+#define GL_INT_VEC3                       0x8B54
+#define GL_INT_VEC4                       0x8B55
+#define GL_BOOL                           0x8B56
+#define GL_BOOL_VEC2                      0x8B57
+#define GL_BOOL_VEC3                      0x8B58
+#define GL_BOOL_VEC4                      0x8B59
+#define GL_FLOAT_MAT2                     0x8B5A
+#define GL_FLOAT_MAT3                     0x8B5B
+#define GL_FLOAT_MAT4                     0x8B5C
+#define GL_SAMPLER_2D                     0x8B5E
+#define GL_SAMPLER_CUBE                   0x8B60
+
+/* Vertex Arrays */
+#define GL_VERTEX_ATTRIB_ARRAY_ENABLED        0x8622
+#define GL_VERTEX_ATTRIB_ARRAY_SIZE           0x8623
+#define GL_VERTEX_ATTRIB_ARRAY_STRIDE         0x8624
+#define GL_VERTEX_ATTRIB_ARRAY_TYPE           0x8625
+#define GL_VERTEX_ATTRIB_ARRAY_NORMALIZED     0x886A
+#define GL_VERTEX_ATTRIB_ARRAY_POINTER        0x8645
+#define GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING 0x889F
+
+/* Read Format */
+#define GL_IMPLEMENTATION_COLOR_READ_TYPE   0x8B9A
+#define GL_IMPLEMENTATION_COLOR_READ_FORMAT 0x8B9B
+
+/* Shader Source */
+#define GL_COMPILE_STATUS                 0x8B81
+#define GL_INFO_LOG_LENGTH                0x8B84
+#define GL_SHADER_SOURCE_LENGTH           0x8B88
+#define GL_SHADER_COMPILER                0x8DFA
+
+/* Shader Binary */
+#define GL_SHADER_BINARY_FORMATS          0x8DF8
+#define GL_NUM_SHADER_BINARY_FORMATS      0x8DF9
+
+/* Shader Precision-Specified Types */
+#define GL_LOW_FLOAT                      0x8DF0
+#define GL_MEDIUM_FLOAT                   0x8DF1
+#define GL_HIGH_FLOAT                     0x8DF2
+#define GL_LOW_INT                        0x8DF3
+#define GL_MEDIUM_INT                     0x8DF4
+#define GL_HIGH_INT                       0x8DF5
+
+/* Framebuffer Object. */
+#define GL_FRAMEBUFFER                    0x8D40
+#define GL_RENDERBUFFER                   0x8D41
+
+#define GL_RGBA4                          0x8056
+#define GL_RGB5_A1                        0x8057
+#define GL_RGB565                         0x8D62
+#define GL_DEPTH_COMPONENT16              0x81A5
+#define GL_STENCIL_INDEX8                 0x8D48
+
+#define GL_RENDERBUFFER_WIDTH             0x8D42
+#define GL_RENDERBUFFER_HEIGHT            0x8D43
+#define GL_RENDERBUFFER_INTERNAL_FORMAT   0x8D44
+#define GL_RENDERBUFFER_RED_SIZE          0x8D50
+#define GL_RENDERBUFFER_GREEN_SIZE        0x8D51
+#define GL_RENDERBUFFER_BLUE_SIZE         0x8D52
+#define GL_RENDERBUFFER_ALPHA_SIZE        0x8D53
+#define GL_RENDERBUFFER_DEPTH_SIZE        0x8D54
+#define GL_RENDERBUFFER_STENCIL_SIZE      0x8D55
+
+#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE           0x8CD0
+#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME           0x8CD1
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL         0x8CD2
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE 0x8CD3
+
+#define GL_COLOR_ATTACHMENT0              0x8CE0
+#define GL_DEPTH_ATTACHMENT               0x8D00
+#define GL_STENCIL_ATTACHMENT             0x8D20
+
+#define GL_NONE                           0
+
+#define GL_FRAMEBUFFER_COMPLETE                      0x8CD5
+#define GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT         0x8CD6
+#define GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT 0x8CD7
+#define GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS         0x8CD9
+#define GL_FRAMEBUFFER_UNSUPPORTED                   0x8CDD
+
+#define GL_FRAMEBUFFER_BINDING            0x8CA6
+#define GL_RENDERBUFFER_BINDING           0x8CA7
+#define GL_MAX_RENDERBUFFER_SIZE          0x84E8
+
+#define GL_INVALID_FRAMEBUFFER_OPERATION  0x0506
+
+/*-------------------------------------------------------------------------
+ * GL core functions.
+ *-----------------------------------------------------------------------*/
+
+GL_APICALL void         GL_APIENTRY glActiveTexture (GLenum texture);
+GL_APICALL void         GL_APIENTRY glAttachShader (GLuint program, GLuint shader);
+GL_APICALL void         GL_APIENTRY glBindAttribLocation (GLuint program, GLuint index, const GLchar* name);
+GL_APICALL void         GL_APIENTRY glBindBuffer (GLenum target, GLuint buffer);
+GL_APICALL void         GL_APIENTRY glBindFramebuffer (GLenum target, GLuint framebuffer);
+GL_APICALL void         GL_APIENTRY glBindRenderbuffer (GLenum target, GLuint renderbuffer);
+GL_APICALL void         GL_APIENTRY glBindTexture (GLenum target, GLuint texture);
+GL_APICALL void         GL_APIENTRY glBlendColor (GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha);
+GL_APICALL void         GL_APIENTRY glBlendEquation ( GLenum mode );
+GL_APICALL void         GL_APIENTRY glBlendEquationSeparate (GLenum modeRGB, GLenum modeAlpha);
+GL_APICALL void         GL_APIENTRY glBlendFunc (GLenum sfactor, GLenum dfactor);
+GL_APICALL void         GL_APIENTRY glBlendFuncSeparate (GLenum srcRGB, GLenum dstRGB, GLenum srcAlpha, GLenum dstAlpha);
+GL_APICALL void         GL_APIENTRY glBufferData (GLenum target, GLsizeiptr size, const GLvoid* data, GLenum usage);
+GL_APICALL void         GL_APIENTRY glBufferSubData (GLenum target, GLintptr offset, GLsizeiptr size, const GLvoid* data);
+GL_APICALL GLenum       GL_APIENTRY glCheckFramebufferStatus (GLenum target);
+GL_APICALL void         GL_APIENTRY glClear (GLbitfield mask);
+GL_APICALL void         GL_APIENTRY glClearColor (GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha);
+GL_APICALL void         GL_APIENTRY glClearDepthf (GLclampf depth);
+GL_APICALL void         GL_APIENTRY glClearStencil (GLint s);
+GL_APICALL void         GL_APIENTRY glColorMask (GLboolean red, GLboolean green, GLboolean blue, GLboolean alpha);
+GL_APICALL void         GL_APIENTRY glCompileShader (GLuint shader);
+GL_APICALL void         GL_APIENTRY glCompressedTexImage2D (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const GLvoid* data);
+GL_APICALL void         GL_APIENTRY glCompressedTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const GLvoid* data);
+GL_APICALL void         GL_APIENTRY glCopyTexImage2D (GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border);
+GL_APICALL void         GL_APIENTRY glCopyTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+GL_APICALL GLuint       GL_APIENTRY glCreateProgram (void);
+GL_APICALL GLuint       GL_APIENTRY glCreateShader (GLenum type);
+GL_APICALL void         GL_APIENTRY glCullFace (GLenum mode);
+GL_APICALL void         GL_APIENTRY glDeleteBuffers (GLsizei n, const GLuint* buffers);
+GL_APICALL void         GL_APIENTRY glDeleteFramebuffers (GLsizei n, const GLuint* framebuffers);
+GL_APICALL void         GL_APIENTRY glDeleteProgram (GLuint program);
+GL_APICALL void         GL_APIENTRY glDeleteRenderbuffers (GLsizei n, const GLuint* renderbuffers);
+GL_APICALL void         GL_APIENTRY glDeleteShader (GLuint shader);
+GL_APICALL void         GL_APIENTRY glDeleteTextures (GLsizei n, const GLuint* textures);
+GL_APICALL void         GL_APIENTRY glDepthFunc (GLenum func);
+GL_APICALL void         GL_APIENTRY glDepthMask (GLboolean flag);
+GL_APICALL void         GL_APIENTRY glDepthRangef (GLclampf zNear, GLclampf zFar);
+GL_APICALL void         GL_APIENTRY glDetachShader (GLuint program, GLuint shader);
+GL_APICALL void         GL_APIENTRY glDisable (GLenum cap);
+GL_APICALL void         GL_APIENTRY glDisableVertexAttribArray (GLuint index);
+GL_APICALL void         GL_APIENTRY glDrawArrays (GLenum mode, GLint first, GLsizei count);
+GL_APICALL void         GL_APIENTRY glDrawElements (GLenum mode, GLsizei count, GLenum type, const GLvoid* indices);
+GL_APICALL void         GL_APIENTRY glEnable (GLenum cap);
+GL_APICALL void         GL_APIENTRY glEnableVertexAttribArray (GLuint index);
+GL_APICALL void         GL_APIENTRY glFinish (void);
+GL_APICALL void         GL_APIENTRY glFlush (void);
+GL_APICALL void         GL_APIENTRY glFramebufferRenderbuffer (GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer);
+GL_APICALL void         GL_APIENTRY glFramebufferTexture2D (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
+GL_APICALL void         GL_APIENTRY glFrontFace (GLenum mode);
+GL_APICALL void         GL_APIENTRY glGenBuffers (GLsizei n, GLuint* buffers);
+GL_APICALL void         GL_APIENTRY glGenerateMipmap (GLenum target);
+GL_APICALL void         GL_APIENTRY glGenFramebuffers (GLsizei n, GLuint* framebuffers);
+GL_APICALL void         GL_APIENTRY glGenRenderbuffers (GLsizei n, GLuint* renderbuffers);
+GL_APICALL void         GL_APIENTRY glGenTextures (GLsizei n, GLuint* textures);
+GL_APICALL void         GL_APIENTRY glGetActiveAttrib (GLuint program, GLuint index, GLsizei bufsize, GLsizei* length, GLint* size, GLenum* type, GLchar* name);
+GL_APICALL void         GL_APIENTRY glGetActiveUniform (GLuint program, GLuint index, GLsizei bufsize, GLsizei* length, GLint* size, GLenum* type, GLchar* name);
+GL_APICALL void         GL_APIENTRY glGetAttachedShaders (GLuint program, GLsizei maxcount, GLsizei* count, GLuint* shaders);
+GL_APICALL int          GL_APIENTRY glGetAttribLocation (GLuint program, const GLchar* name);
+GL_APICALL void         GL_APIENTRY glGetBooleanv (GLenum pname, GLboolean* params);
+GL_APICALL void         GL_APIENTRY glGetBufferParameteriv (GLenum target, GLenum pname, GLint* params);
+GL_APICALL GLenum       GL_APIENTRY glGetError (void);
+GL_APICALL void         GL_APIENTRY glGetFloatv (GLenum pname, GLfloat* params);
+GL_APICALL void         GL_APIENTRY glGetFramebufferAttachmentParameteriv (GLenum target, GLenum attachment, GLenum pname, GLint* params);
+GL_APICALL void         GL_APIENTRY glGetIntegerv (GLenum pname, GLint* params);
+GL_APICALL void         GL_APIENTRY glGetProgramiv (GLuint program, GLenum pname, GLint* params);
+GL_APICALL void         GL_APIENTRY glGetProgramInfoLog (GLuint program, GLsizei bufsize, GLsizei* length, GLchar* infolog);
+GL_APICALL void         GL_APIENTRY glGetRenderbufferParameteriv (GLenum target, GLenum pname, GLint* params);
+GL_APICALL void         GL_APIENTRY glGetShaderiv (GLuint shader, GLenum pname, GLint* params);
+GL_APICALL void         GL_APIENTRY glGetShaderInfoLog (GLuint shader, GLsizei bufsize, GLsizei* length, GLchar* infolog);
+GL_APICALL void         GL_APIENTRY glGetShaderPrecisionFormat (GLenum shadertype, GLenum precisiontype, GLint* range, GLint* precision);
+GL_APICALL void         GL_APIENTRY glGetShaderSource (GLuint shader, GLsizei bufsize, GLsizei* length, GLchar* source);
+GL_APICALL const GLubyte* GL_APIENTRY glGetString (GLenum name);
+GL_APICALL void         GL_APIENTRY glGetTexParameterfv (GLenum target, GLenum pname, GLfloat* params);
+GL_APICALL void         GL_APIENTRY glGetTexParameteriv (GLenum target, GLenum pname, GLint* params);
+GL_APICALL void         GL_APIENTRY glGetUniformfv (GLuint program, GLint location, GLfloat* params);
+GL_APICALL void         GL_APIENTRY glGetUniformiv (GLuint program, GLint location, GLint* params);
+GL_APICALL int          GL_APIENTRY glGetUniformLocation (GLuint program, const GLchar* name);
+GL_APICALL void         GL_APIENTRY glGetVertexAttribfv (GLuint index, GLenum pname, GLfloat* params);
+GL_APICALL void         GL_APIENTRY glGetVertexAttribiv (GLuint index, GLenum pname, GLint* params);
+GL_APICALL void         GL_APIENTRY glGetVertexAttribPointerv (GLuint index, GLenum pname, GLvoid** pointer);
+GL_APICALL void         GL_APIENTRY glHint (GLenum target, GLenum mode);
+GL_APICALL GLboolean    GL_APIENTRY glIsBuffer (GLuint buffer);
+GL_APICALL GLboolean    GL_APIENTRY glIsEnabled (GLenum cap);
+GL_APICALL GLboolean    GL_APIENTRY glIsFramebuffer (GLuint framebuffer);
+GL_APICALL GLboolean    GL_APIENTRY glIsProgram (GLuint program);
+GL_APICALL GLboolean    GL_APIENTRY glIsRenderbuffer (GLuint renderbuffer);
+GL_APICALL GLboolean    GL_APIENTRY glIsShader (GLuint shader);
+GL_APICALL GLboolean    GL_APIENTRY glIsTexture (GLuint texture);
+GL_APICALL void         GL_APIENTRY glLineWidth (GLfloat width);
+GL_APICALL void         GL_APIENTRY glLinkProgram (GLuint program);
+GL_APICALL void         GL_APIENTRY glPixelStorei (GLenum pname, GLint param);
+GL_APICALL void         GL_APIENTRY glPolygonOffset (GLfloat factor, GLfloat units);
+GL_APICALL void         GL_APIENTRY glReadPixels (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLvoid* pixels);
+GL_APICALL void         GL_APIENTRY glReleaseShaderCompiler (void);
+GL_APICALL void         GL_APIENTRY glRenderbufferStorage (GLenum target, GLenum internalformat, GLsizei width, GLsizei height);
+GL_APICALL void         GL_APIENTRY glSampleCoverage (GLclampf value, GLboolean invert);
+GL_APICALL void         GL_APIENTRY glScissor (GLint x, GLint y, GLsizei width, GLsizei height);
+GL_APICALL void         GL_APIENTRY glShaderBinary (GLsizei n, const GLuint* shaders, GLenum binaryformat, const GLvoid* binary, GLsizei length);
+GL_APICALL void         GL_APIENTRY glShaderSource (GLuint shader, GLsizei count, const GLchar* const* string, const GLint* length);
+GL_APICALL void         GL_APIENTRY glStencilFunc (GLenum func, GLint ref, GLuint mask);
+GL_APICALL void         GL_APIENTRY glStencilFuncSeparate (GLenum face, GLenum func, GLint ref, GLuint mask);
+GL_APICALL void         GL_APIENTRY glStencilMask (GLuint mask);
+GL_APICALL void         GL_APIENTRY glStencilMaskSeparate (GLenum face, GLuint mask);
+GL_APICALL void         GL_APIENTRY glStencilOp (GLenum fail, GLenum zfail, GLenum zpass);
+GL_APICALL void         GL_APIENTRY glStencilOpSeparate (GLenum face, GLenum fail, GLenum zfail, GLenum zpass);
+GL_APICALL void         GL_APIENTRY glTexImage2D (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid* pixels);
+GL_APICALL void         GL_APIENTRY glTexParameterf (GLenum target, GLenum pname, GLfloat param);
+GL_APICALL void         GL_APIENTRY glTexParameterfv (GLenum target, GLenum pname, const GLfloat* params);
+GL_APICALL void         GL_APIENTRY glTexParameteri (GLenum target, GLenum pname, GLint param);
+GL_APICALL void         GL_APIENTRY glTexParameteriv (GLenum target, GLenum pname, const GLint* params);
+GL_APICALL void         GL_APIENTRY glTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid* pixels);
+GL_APICALL void         GL_APIENTRY glUniform1f (GLint location, GLfloat x);
+GL_APICALL void         GL_APIENTRY glUniform1fv (GLint location, GLsizei count, const GLfloat* v);
+GL_APICALL void         GL_APIENTRY glUniform1i (GLint location, GLint x);
+GL_APICALL void         GL_APIENTRY glUniform1iv (GLint location, GLsizei count, const GLint* v);
+GL_APICALL void         GL_APIENTRY glUniform2f (GLint location, GLfloat x, GLfloat y);
+GL_APICALL void         GL_APIENTRY glUniform2fv (GLint location, GLsizei count, const GLfloat* v);
+GL_APICALL void         GL_APIENTRY glUniform2i (GLint location, GLint x, GLint y);
+GL_APICALL void         GL_APIENTRY glUniform2iv (GLint location, GLsizei count, const GLint* v);
+GL_APICALL void         GL_APIENTRY glUniform3f (GLint location, GLfloat x, GLfloat y, GLfloat z);
+GL_APICALL void         GL_APIENTRY glUniform3fv (GLint location, GLsizei count, const GLfloat* v);
+GL_APICALL void         GL_APIENTRY glUniform3i (GLint location, GLint x, GLint y, GLint z);
+GL_APICALL void         GL_APIENTRY glUniform3iv (GLint location, GLsizei count, const GLint* v);
+GL_APICALL void         GL_APIENTRY glUniform4f (GLint location, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+GL_APICALL void         GL_APIENTRY glUniform4fv (GLint location, GLsizei count, const GLfloat* v);
+GL_APICALL void         GL_APIENTRY glUniform4i (GLint location, GLint x, GLint y, GLint z, GLint w);
+GL_APICALL void         GL_APIENTRY glUniform4iv (GLint location, GLsizei count, const GLint* v);
+GL_APICALL void         GL_APIENTRY glUniformMatrix2fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+GL_APICALL void         GL_APIENTRY glUniformMatrix3fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+GL_APICALL void         GL_APIENTRY glUniformMatrix4fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+GL_APICALL void         GL_APIENTRY glUseProgram (GLuint program);
+GL_APICALL void         GL_APIENTRY glValidateProgram (GLuint program);
+GL_APICALL void         GL_APIENTRY glVertexAttrib1f (GLuint indx, GLfloat x);
+GL_APICALL void         GL_APIENTRY glVertexAttrib1fv (GLuint indx, const GLfloat* values);
+GL_APICALL void         GL_APIENTRY glVertexAttrib2f (GLuint indx, GLfloat x, GLfloat y);
+GL_APICALL void         GL_APIENTRY glVertexAttrib2fv (GLuint indx, const GLfloat* values);
+GL_APICALL void         GL_APIENTRY glVertexAttrib3f (GLuint indx, GLfloat x, GLfloat y, GLfloat z);
+GL_APICALL void         GL_APIENTRY glVertexAttrib3fv (GLuint indx, const GLfloat* values);
+GL_APICALL void         GL_APIENTRY glVertexAttrib4f (GLuint indx, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+GL_APICALL void         GL_APIENTRY glVertexAttrib4fv (GLuint indx, const GLfloat* values);
+GL_APICALL void         GL_APIENTRY glVertexAttribPointer (GLuint indx, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const GLvoid* ptr);
+GL_APICALL void         GL_APIENTRY glViewport (GLint x, GLint y, GLsizei width, GLsizei height);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __gl2_h_ */
diff --git a/opengles-3.1/include/GLES2/gl2ext.h b/opengles-3.1/include/GLES2/gl2ext.h
new file mode 100644
index 0000000000..f7e7a61369
--- /dev/null
+++ b/opengles-3.1/include/GLES2/gl2ext.h
@@ -0,0 +1,1809 @@
+#ifndef __gl2ext_h_
+#define __gl2ext_h_
+
+/* $Revision: 19436 $ on $Date:: 2012-10-10 10:37:04 -0700 #$ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * This document is licensed under the SGI Free Software B License Version
+ * 2.0. For details, see http://oss.sgi.com/projects/FreeB/ .
+ */
+
+#ifndef GL_APIENTRYP
+#   define GL_APIENTRYP GL_APIENTRY*
+#endif
+
+/*------------------------------------------------------------------------*
+ * OES extension tokens
+ *------------------------------------------------------------------------*/
+
+/* GL_OES_compressed_ETC1_RGB8_texture */
+#ifndef GL_OES_compressed_ETC1_RGB8_texture
+#define GL_ETC1_RGB8_OES                                        0x8D64
+#endif
+
+/* GL_OES_compressed_paletted_texture */
+#ifndef GL_OES_compressed_paletted_texture
+#define GL_PALETTE4_RGB8_OES                                    0x8B90
+#define GL_PALETTE4_RGBA8_OES                                   0x8B91
+#define GL_PALETTE4_R5_G6_B5_OES                                0x8B92
+#define GL_PALETTE4_RGBA4_OES                                   0x8B93
+#define GL_PALETTE4_RGB5_A1_OES                                 0x8B94
+#define GL_PALETTE8_RGB8_OES                                    0x8B95
+#define GL_PALETTE8_RGBA8_OES                                   0x8B96
+#define GL_PALETTE8_R5_G6_B5_OES                                0x8B97
+#define GL_PALETTE8_RGBA4_OES                                   0x8B98
+#define GL_PALETTE8_RGB5_A1_OES                                 0x8B99
+#endif
+
+/* GL_OES_depth24 */
+#ifndef GL_OES_depth24
+#define GL_DEPTH_COMPONENT24_OES                                0x81A6
+#endif
+
+/* GL_OES_depth32 */
+#ifndef GL_OES_depth32
+#define GL_DEPTH_COMPONENT32_OES                                0x81A7
+#endif
+
+/* GL_OES_depth_texture */
+/* No new tokens introduced by this extension. */
+
+/* GL_OES_EGL_image */
+#ifndef GL_OES_EGL_image
+typedef void* GLeglImageOES;
+#endif
+
+/* GL_OES_EGL_image_external */
+#ifndef GL_OES_EGL_image_external
+/* GLeglImageOES defined in GL_OES_EGL_image already. */
+#define GL_TEXTURE_EXTERNAL_OES                                 0x8D65
+#define GL_SAMPLER_EXTERNAL_OES                                 0x8D66
+#define GL_TEXTURE_BINDING_EXTERNAL_OES                         0x8D67
+#define GL_REQUIRED_TEXTURE_IMAGE_UNITS_OES                     0x8D68
+#endif
+
+/* GL_OES_element_index_uint */
+#ifndef GL_OES_element_index_uint
+#define GL_UNSIGNED_INT                                         0x1405
+#endif
+
+/* GL_OES_get_program_binary */
+#ifndef GL_OES_get_program_binary
+#define GL_PROGRAM_BINARY_LENGTH_OES                            0x8741
+#define GL_NUM_PROGRAM_BINARY_FORMATS_OES                       0x87FE
+#define GL_PROGRAM_BINARY_FORMATS_OES                           0x87FF
+#endif
+
+/* GL_OES_mapbuffer */
+#ifndef GL_OES_mapbuffer
+#define GL_WRITE_ONLY_OES                                       0x88B9
+#define GL_BUFFER_ACCESS_OES                                    0x88BB
+#define GL_BUFFER_MAPPED_OES                                    0x88BC
+#define GL_BUFFER_MAP_POINTER_OES                               0x88BD
+#endif
+
+/* GL_OES_packed_depth_stencil */
+#ifndef GL_OES_packed_depth_stencil
+#define GL_DEPTH_STENCIL_OES                                    0x84F9
+#define GL_UNSIGNED_INT_24_8_OES                                0x84FA
+#define GL_DEPTH24_STENCIL8_OES                                 0x88F0
+#endif
+
+/* GL_OES_required_internalformat */
+#ifndef GL_OES_required_internalformat 
+#define GL_ALPHA8_OES                                           0x803C
+#define GL_DEPTH_COMPONENT16_OES                                0x81A5
+/* reuse GL_DEPTH_COMPONENT24_OES */                            
+/* reuse GL_DEPTH24_STENCIL8_OES */                             
+/* reuse GL_DEPTH_COMPONENT32_OES */                            
+#define GL_LUMINANCE4_ALPHA4_OES                                0x8043
+#define GL_LUMINANCE8_ALPHA8_OES                                0x8045
+#define GL_LUMINANCE8_OES                                       0x8040
+#define GL_RGBA4_OES                                            0x8056
+#define GL_RGB5_A1_OES                                          0x8057
+#define GL_RGB565_OES                                           0x8D62
+/* reuse GL_RGB8_OES */                              
+/* reuse GL_RGBA8_OES */  
+/* reuse GL_RGB10_EXT */
+/* reuse GL_RGB10_A2_EXT */
+#endif 
+
+/* GL_OES_rgb8_rgba8 */
+#ifndef GL_OES_rgb8_rgba8
+#define GL_RGB8_OES                                             0x8051
+#define GL_RGBA8_OES                                            0x8058
+#endif
+
+/* GL_OES_standard_derivatives */
+#ifndef GL_OES_standard_derivatives
+#define GL_FRAGMENT_SHADER_DERIVATIVE_HINT_OES                  0x8B8B
+#endif
+
+/* GL_OES_stencil1 */
+#ifndef GL_OES_stencil1
+#define GL_STENCIL_INDEX1_OES                                   0x8D46
+#endif
+
+/* GL_OES_stencil4 */
+#ifndef GL_OES_stencil4
+#define GL_STENCIL_INDEX4_OES                                   0x8D47
+#endif
+
+#ifndef GL_OES_surfaceless_context
+#define GL_FRAMEBUFFER_UNDEFINED_OES                            0x8219
+#endif
+
+/* GL_OES_texture_3D */
+#ifndef GL_OES_texture_3D
+#define GL_TEXTURE_WRAP_R_OES                                   0x8072
+#define GL_TEXTURE_3D_OES                                       0x806F
+#define GL_TEXTURE_BINDING_3D_OES                               0x806A
+#define GL_MAX_3D_TEXTURE_SIZE_OES                              0x8073
+#define GL_SAMPLER_3D_OES                                       0x8B5F
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_3D_ZOFFSET_OES        0x8CD4
+#endif
+
+/* GL_OES_texture_float */
+/* No new tokens introduced by this extension. */
+
+/* GL_OES_texture_float_linear */
+/* No new tokens introduced by this extension. */
+
+/* GL_OES_texture_half_float */
+#ifndef GL_OES_texture_half_float
+#define GL_HALF_FLOAT_OES                                       0x8D61
+#endif
+
+/* GL_OES_texture_half_float_linear */
+/* No new tokens introduced by this extension. */
+
+/* GL_OES_texture_npot */
+/* No new tokens introduced by this extension. */
+
+/* GL_OES_vertex_array_object */
+#ifndef GL_OES_vertex_array_object
+#define GL_VERTEX_ARRAY_BINDING_OES                             0x85B5
+#endif
+
+/* GL_OES_vertex_half_float */
+/* GL_HALF_FLOAT_OES defined in GL_OES_texture_half_float already. */
+
+/* GL_OES_vertex_type_10_10_10_2 */
+#ifndef GL_OES_vertex_type_10_10_10_2
+#define GL_UNSIGNED_INT_10_10_10_2_OES                          0x8DF6
+#define GL_INT_10_10_10_2_OES                                   0x8DF7
+#endif
+
+/*------------------------------------------------------------------------*
+ * KHR extension tokens
+ *------------------------------------------------------------------------*/
+
+#ifndef GL_KHR_debug
+typedef void (GL_APIENTRYP GLDEBUGPROC)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,GLvoid *userParam);
+#define GL_DEBUG_OUTPUT_SYNCHRONOUS                             0x8242
+#define GL_DEBUG_NEXT_LOGGED_MESSAGE_LENGTH                     0x8243
+#define GL_DEBUG_CALLBACK_FUNCTION                              0x8244
+#define GL_DEBUG_CALLBACK_USER_PARAM                            0x8245
+#define GL_DEBUG_SOURCE_API                                     0x8246
+#define GL_DEBUG_SOURCE_WINDOW_SYSTEM                           0x8247
+#define GL_DEBUG_SOURCE_SHADER_COMPILER                         0x8248
+#define GL_DEBUG_SOURCE_THIRD_PARTY                             0x8249
+#define GL_DEBUG_SOURCE_APPLICATION                             0x824A
+#define GL_DEBUG_SOURCE_OTHER                                   0x824B
+#define GL_DEBUG_TYPE_ERROR                                     0x824C
+#define GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR                       0x824D
+#define GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR                        0x824E
+#define GL_DEBUG_TYPE_PORTABILITY                               0x824F
+#define GL_DEBUG_TYPE_PERFORMANCE                               0x8250
+#define GL_DEBUG_TYPE_OTHER                                     0x8251
+#define GL_DEBUG_TYPE_MARKER                                    0x8268
+#define GL_DEBUG_TYPE_PUSH_GROUP                                0x8269
+#define GL_DEBUG_TYPE_POP_GROUP                                 0x826A
+#define GL_DEBUG_SEVERITY_NOTIFICATION                          0x826B
+#define GL_MAX_DEBUG_GROUP_STACK_DEPTH                          0x826C
+#define GL_DEBUG_GROUP_STACK_DEPTH                              0x826D
+#define GL_BUFFER                                               0x82E0
+#define GL_SHADER                                               0x82E1
+#define GL_PROGRAM                                              0x82E2
+#define GL_QUERY                                                0x82E3
+/* PROGRAM_PIPELINE only in GL */                               
+#define GL_SAMPLER                                              0x82E6
+/* DISPLAY_LIST only in GL */                                   
+#define GL_MAX_LABEL_LENGTH                                     0x82E8
+#define GL_MAX_DEBUG_MESSAGE_LENGTH                             0x9143
+#define GL_MAX_DEBUG_LOGGED_MESSAGES                            0x9144
+#define GL_DEBUG_LOGGED_MESSAGES                                0x9145
+#define GL_DEBUG_SEVERITY_HIGH                                  0x9146
+#define GL_DEBUG_SEVERITY_MEDIUM                                0x9147
+#define GL_DEBUG_SEVERITY_LOW                                   0x9148
+#define GL_DEBUG_OUTPUT                                         0x92E0
+#define GL_CONTEXT_FLAG_DEBUG_BIT                               0x00000002
+#define GL_STACK_OVERFLOW                                       0x0503
+#define GL_STACK_UNDERFLOW                                      0x0504
+#endif
+
+#ifndef GL_KHR_texture_compression_astc_ldr
+#define GL_COMPRESSED_RGBA_ASTC_4x4_KHR                         0x93B0
+#define GL_COMPRESSED_RGBA_ASTC_5x4_KHR                         0x93B1
+#define GL_COMPRESSED_RGBA_ASTC_5x5_KHR                         0x93B2
+#define GL_COMPRESSED_RGBA_ASTC_6x5_KHR                         0x93B3
+#define GL_COMPRESSED_RGBA_ASTC_6x6_KHR                         0x93B4
+#define GL_COMPRESSED_RGBA_ASTC_8x5_KHR                         0x93B5
+#define GL_COMPRESSED_RGBA_ASTC_8x6_KHR                         0x93B6
+#define GL_COMPRESSED_RGBA_ASTC_8x8_KHR                         0x93B7
+#define GL_COMPRESSED_RGBA_ASTC_10x5_KHR                        0x93B8
+#define GL_COMPRESSED_RGBA_ASTC_10x6_KHR                        0x93B9
+#define GL_COMPRESSED_RGBA_ASTC_10x8_KHR                        0x93BA
+#define GL_COMPRESSED_RGBA_ASTC_10x10_KHR                       0x93BB
+#define GL_COMPRESSED_RGBA_ASTC_12x10_KHR                       0x93BC
+#define GL_COMPRESSED_RGBA_ASTC_12x12_KHR                       0x93BD
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR                 0x93D0
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR                 0x93D1
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR                 0x93D2
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR                 0x93D3
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR                 0x93D4
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR                 0x93D5
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR                 0x93D6
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR                 0x93D7
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR                0x93D8
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR                0x93D9
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR                0x93DA
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR               0x93DB
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR               0x93DC
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR               0x93DD
+#endif
+
+/*------------------------------------------------------------------------*
+ * AMD extension tokens
+ *------------------------------------------------------------------------*/
+
+/* GL_AMD_compressed_3DC_texture */
+#ifndef GL_AMD_compressed_3DC_texture
+#define GL_3DC_X_AMD                                            0x87F9
+#define GL_3DC_XY_AMD                                           0x87FA
+#endif
+
+/* GL_AMD_compressed_ATC_texture */
+#ifndef GL_AMD_compressed_ATC_texture
+#define GL_ATC_RGB_AMD                                          0x8C92
+#define GL_ATC_RGBA_EXPLICIT_ALPHA_AMD                          0x8C93
+#define GL_ATC_RGBA_INTERPOLATED_ALPHA_AMD                      0x87EE
+#endif
+
+/* GL_AMD_performance_monitor */
+#ifndef GL_AMD_performance_monitor
+#define GL_COUNTER_TYPE_AMD                                     0x8BC0
+#define GL_COUNTER_RANGE_AMD                                    0x8BC1
+#define GL_UNSIGNED_INT64_AMD                                   0x8BC2
+#define GL_PERCENTAGE_AMD                                       0x8BC3
+#define GL_PERFMON_RESULT_AVAILABLE_AMD                         0x8BC4
+#define GL_PERFMON_RESULT_SIZE_AMD                              0x8BC5
+#define GL_PERFMON_RESULT_AMD                                   0x8BC6
+#endif
+
+/* GL_AMD_program_binary_Z400 */
+#ifndef GL_AMD_program_binary_Z400
+#define GL_Z400_BINARY_AMD                                      0x8740
+#endif
+
+/*------------------------------------------------------------------------*
+ * ANGLE extension tokens
+ *------------------------------------------------------------------------*/
+
+/* GL_ANGLE_framebuffer_blit */
+#ifndef GL_ANGLE_framebuffer_blit
+#define GL_READ_FRAMEBUFFER_ANGLE                               0x8CA8
+#define GL_DRAW_FRAMEBUFFER_ANGLE                               0x8CA9
+#define GL_DRAW_FRAMEBUFFER_BINDING_ANGLE                       0x8CA6
+#define GL_READ_FRAMEBUFFER_BINDING_ANGLE                       0x8CAA
+#endif
+
+/* GL_ANGLE_framebuffer_multisample */
+#ifndef GL_ANGLE_framebuffer_multisample
+#define GL_RENDERBUFFER_SAMPLES_ANGLE                           0x8CAB
+#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE_ANGLE             0x8D56
+#define GL_MAX_SAMPLES_ANGLE                                    0x8D57
+#endif
+
+/* GL_ANGLE_instanced_arrays */
+#ifndef GL_ANGLE_instanced_arrays 
+#define GL_VERTEX_ATTRIB_ARRAY_DIVISOR_ANGLE                    0x88FE
+#endif
+
+/* GL_ANGLE_pack_reverse_row_order */
+#ifndef GL_ANGLE_pack_reverse_row_order 
+#define GL_PACK_REVERSE_ROW_ORDER_ANGLE                         0x93A4
+#endif
+
+/* GL_ANGLE_texture_compression_dxt3 */
+#ifndef GL_ANGLE_texture_compression_dxt3 
+#define GL_COMPRESSED_RGBA_S3TC_DXT3_ANGLE                      0x83F2
+#endif
+
+/* GL_ANGLE_texture_compression_dxt5 */
+#ifndef GL_ANGLE_texture_compression_dxt5 
+#define GL_COMPRESSED_RGBA_S3TC_DXT5_ANGLE                      0x83F3
+#endif
+
+/* GL_ANGLE_texture_usage */
+#ifndef GL_ANGLE_texture_usage 
+#define GL_TEXTURE_USAGE_ANGLE                                  0x93A2
+#define GL_FRAMEBUFFER_ATTACHMENT_ANGLE                         0x93A3
+#endif
+
+/* GL_ANGLE_translated_shader_source */
+#ifndef GL_ANGLE_translated_shader_source 
+#define GL_TRANSLATED_SHADER_SOURCE_LENGTH_ANGLE                0x93A0
+#endif
+
+/*------------------------------------------------------------------------*
+ * APPLE extension tokens
+ *------------------------------------------------------------------------*/
+
+/* GL_APPLE_copy_texture_levels */
+/* No new tokens introduced by this extension. */
+    
+/* GL_APPLE_framebuffer_multisample */
+#ifndef GL_APPLE_framebuffer_multisample
+#define GL_RENDERBUFFER_SAMPLES_APPLE                           0x8CAB
+#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE_APPLE             0x8D56
+#define GL_MAX_SAMPLES_APPLE                                    0x8D57
+#define GL_READ_FRAMEBUFFER_APPLE                               0x8CA8
+#define GL_DRAW_FRAMEBUFFER_APPLE                               0x8CA9
+#define GL_DRAW_FRAMEBUFFER_BINDING_APPLE                       0x8CA6
+#define GL_READ_FRAMEBUFFER_BINDING_APPLE                       0x8CAA
+#endif
+
+/* GL_APPLE_rgb_422 */
+#ifndef GL_APPLE_rgb_422
+#define GL_RGB_422_APPLE                                        0x8A1F
+#define GL_UNSIGNED_SHORT_8_8_APPLE                             0x85BA
+#define GL_UNSIGNED_SHORT_8_8_REV_APPLE                         0x85BB
+#endif
+
+/* GL_APPLE_sync */
+#ifndef GL_APPLE_sync
+
+#ifndef __gl3_h_
+/* These types are defined with reference to <inttypes.h>
+ * in the Apple extension spec, but here we use the Khronos
+ * portable types in khrplatform.h, and assume those types 
+ * are always defined.
+ * If any other extensions using these types are defined, 
+ * the typedefs must move out of this block and be shared.
+ */
+typedef khronos_int64_t GLint64;
+typedef khronos_uint64_t GLuint64;
+typedef struct __GLsync *GLsync;
+#endif
+
+#define GL_SYNC_OBJECT_APPLE                                    0x8A53
+#define GL_MAX_SERVER_WAIT_TIMEOUT_APPLE                        0x9111
+#define GL_OBJECT_TYPE_APPLE                                    0x9112
+#define GL_SYNC_CONDITION_APPLE                                 0x9113
+#define GL_SYNC_STATUS_APPLE                                    0x9114
+#define GL_SYNC_FLAGS_APPLE                                     0x9115
+#define GL_SYNC_FENCE_APPLE                                     0x9116
+#define GL_SYNC_GPU_COMMANDS_COMPLETE_APPLE                     0x9117
+#define GL_UNSIGNALED_APPLE                                     0x9118
+#define GL_SIGNALED_APPLE                                       0x9119
+#define GL_ALREADY_SIGNALED_APPLE                               0x911A
+#define GL_TIMEOUT_EXPIRED_APPLE                                0x911B
+#define GL_CONDITION_SATISFIED_APPLE                            0x911C
+#define GL_WAIT_FAILED_APPLE                                    0x911D
+#define GL_SYNC_FLUSH_COMMANDS_BIT_APPLE                        0x00000001
+#define GL_TIMEOUT_IGNORED_APPLE                                0xFFFFFFFFFFFFFFFFull
+#endif
+
+/* GL_APPLE_texture_format_BGRA8888 */
+#ifndef GL_APPLE_texture_format_BGRA8888
+#define GL_BGRA_EXT                                             0x80E1
+#endif
+
+/* GL_APPLE_texture_max_level */
+#ifndef GL_APPLE_texture_max_level
+#define GL_TEXTURE_MAX_LEVEL_APPLE                              0x813D
+#endif
+
+/*------------------------------------------------------------------------*
+ * ARM extension tokens
+ *------------------------------------------------------------------------*/
+
+/* GL_ARM_mali_program_binary */
+#ifndef GL_ARM_mali_program_binary
+#define GL_MALI_PROGRAM_BINARY_ARM                              0x8F61
+#endif
+
+/* GL_ARM_mali_shader_binary */
+#ifndef GL_ARM_mali_shader_binary
+#define GL_MALI_SHADER_BINARY_ARM                               0x8F60
+#endif
+
+/* GL_ARM_rgba8 */
+/* No new tokens introduced by this extension. */
+
+/*------------------------------------------------------------------------*
+ * EXT extension tokens
+ *------------------------------------------------------------------------*/
+
+/* GL_EXT_blend_minmax */
+#ifndef GL_EXT_blend_minmax
+#define GL_MIN_EXT                                              0x8007
+#define GL_MAX_EXT                                              0x8008
+#endif
+
+/* GL_EXT_color_buffer_half_float */
+#ifndef GL_EXT_color_buffer_half_float
+#define GL_RGBA16F_EXT                                          0x881A
+#define GL_RGB16F_EXT                                           0x881B
+#define GL_RG16F_EXT                                            0x822F
+#define GL_R16F_EXT                                             0x822D
+#define GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE_EXT            0x8211
+#define GL_UNSIGNED_NORMALIZED_EXT                              0x8C17
+#endif
+
+/* GL_EXT_debug_label */
+#ifndef GL_EXT_debug_label
+#define GL_PROGRAM_PIPELINE_OBJECT_EXT                          0x8A4F
+#define GL_PROGRAM_OBJECT_EXT                                   0x8B40
+#define GL_SHADER_OBJECT_EXT                                    0x8B48
+#define GL_BUFFER_OBJECT_EXT                                    0x9151
+#define GL_QUERY_OBJECT_EXT                                     0x9153
+#define GL_VERTEX_ARRAY_OBJECT_EXT                              0x9154
+#endif
+
+/* GL_EXT_debug_marker */
+/* No new tokens introduced by this extension. */
+
+/* GL_EXT_discard_framebuffer */
+#ifndef GL_EXT_discard_framebuffer
+#define GL_COLOR_EXT                                            0x1800
+#define GL_DEPTH_EXT                                            0x1801
+#define GL_STENCIL_EXT                                          0x1802
+#endif
+
+/* GL_EXT_map_buffer_range */
+#ifndef GL_EXT_map_buffer_range
+#define GL_MAP_READ_BIT_EXT                                     0x0001
+#define GL_MAP_WRITE_BIT_EXT                                    0x0002
+#define GL_MAP_INVALIDATE_RANGE_BIT_EXT                         0x0004
+#define GL_MAP_INVALIDATE_BUFFER_BIT_EXT                        0x0008
+#define GL_MAP_FLUSH_EXPLICIT_BIT_EXT                           0x0010
+#define GL_MAP_UNSYNCHRONIZED_BIT_EXT                           0x0020
+#endif
+
+/* GL_EXT_multisampled_render_to_texture */
+#ifndef GL_EXT_multisampled_render_to_texture
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_SAMPLES_EXT           0x8D6C
+/* reuse values from GL_EXT_framebuffer_multisample (desktop extension) */ 
+#define GL_RENDERBUFFER_SAMPLES_EXT                             0x8CAB
+#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE_EXT               0x8D56
+#define GL_MAX_SAMPLES_EXT                                      0x8D57
+#endif
+
+/* GL_EXT_multiview_draw_buffers */
+#ifndef GL_EXT_multiview_draw_buffers
+#define GL_COLOR_ATTACHMENT_EXT                                 0x90F0
+#define GL_MULTIVIEW_EXT                                        0x90F1
+#define GL_DRAW_BUFFER_EXT                                      0x0C01
+#define GL_READ_BUFFER_EXT                                      0x0C02
+#define GL_MAX_MULTIVIEW_BUFFERS_EXT                            0x90F2
+#endif
+
+/* GL_EXT_multi_draw_arrays */
+/* No new tokens introduced by this extension. */
+
+/* GL_EXT_occlusion_query_boolean */
+#ifndef GL_EXT_occlusion_query_boolean
+#define GL_ANY_SAMPLES_PASSED_EXT                               0x8C2F
+#define GL_ANY_SAMPLES_PASSED_CONSERVATIVE_EXT                  0x8D6A
+#define GL_CURRENT_QUERY_EXT                                    0x8865
+#define GL_QUERY_RESULT_EXT                                     0x8866
+#define GL_QUERY_RESULT_AVAILABLE_EXT                           0x8867
+#endif
+
+/* GL_EXT_read_format_bgra */
+#ifndef GL_EXT_read_format_bgra
+#define GL_BGRA_EXT                                             0x80E1
+#define GL_UNSIGNED_SHORT_4_4_4_4_REV_EXT                       0x8365
+#define GL_UNSIGNED_SHORT_1_5_5_5_REV_EXT                       0x8366
+#endif
+
+/* GL_EXT_robustness */
+#ifndef GL_EXT_robustness
+/* reuse GL_NO_ERROR */
+#define GL_GUILTY_CONTEXT_RESET_EXT                             0x8253
+#define GL_INNOCENT_CONTEXT_RESET_EXT                           0x8254
+#define GL_UNKNOWN_CONTEXT_RESET_EXT                            0x8255
+#define GL_CONTEXT_ROBUST_ACCESS_EXT                            0x90F3
+#define GL_RESET_NOTIFICATION_STRATEGY_EXT                      0x8256
+#define GL_LOSE_CONTEXT_ON_RESET_EXT                            0x8252
+#define GL_NO_RESET_NOTIFICATION_EXT                            0x8261
+#endif
+
+/* GL_EXT_separate_shader_objects */
+#ifndef GL_EXT_separate_shader_objects
+#define GL_VERTEX_SHADER_BIT_EXT                                0x00000001
+#define GL_FRAGMENT_SHADER_BIT_EXT                              0x00000002
+#define GL_ALL_SHADER_BITS_EXT                                  0xFFFFFFFF
+#define GL_PROGRAM_SEPARABLE_EXT                                0x8258
+#define GL_ACTIVE_PROGRAM_EXT                                   0x8259
+#define GL_PROGRAM_PIPELINE_BINDING_EXT                         0x825A
+#endif
+
+/* GL_EXT_shader_framebuffer_fetch */
+#ifndef GL_EXT_shader_framebuffer_fetch
+#define GL_FRAGMENT_SHADER_DISCARDS_SAMPLES_EXT                 0x8A52
+#endif
+
+/* GL_EXT_shader_texture_lod */
+/* No new tokens introduced by this extension. */
+
+/* GL_EXT_shadow_samplers */
+#ifndef GL_EXT_shadow_samplers
+#define GL_TEXTURE_COMPARE_MODE_EXT                             0x884C
+#define GL_TEXTURE_COMPARE_FUNC_EXT                             0x884D
+#define GL_COMPARE_REF_TO_TEXTURE_EXT                           0x884E
+#define GL_SAMPLER_2D_SHADOW_EXT                                0x8B62
+#endif
+
+/* GL_EXT_sRGB */
+#ifndef GL_EXT_sRGB
+#define GL_SRGB_EXT                                             0x8C40
+#define GL_SRGB_ALPHA_EXT                                       0x8C42
+#define GL_SRGB8_ALPHA8_EXT                                     0x8C43
+#define GL_FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING_EXT            0x8210
+#endif
+
+/* GL_EXT_texture_compression_dxt1 */
+#ifndef GL_EXT_texture_compression_dxt1
+#define GL_COMPRESSED_RGB_S3TC_DXT1_EXT                         0x83F0
+#define GL_COMPRESSED_RGBA_S3TC_DXT1_EXT                        0x83F1
+#endif
+
+/* GL_EXT_texture_filter_anisotropic */
+#ifndef GL_EXT_texture_filter_anisotropic
+#define GL_TEXTURE_MAX_ANISOTROPY_EXT                           0x84FE
+#define GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT                       0x84FF
+#endif
+
+/* GL_EXT_texture_format_BGRA8888 */
+#ifndef GL_EXT_texture_format_BGRA8888
+#define GL_BGRA_EXT                                             0x80E1
+#endif
+
+/* GL_EXT_texture_rg */
+#ifndef GL_EXT_texture_rg
+#define GL_RED_EXT                                              0x1903
+#define GL_RG_EXT                                               0x8227
+#define GL_R8_EXT                                               0x8229
+#define GL_RG8_EXT                                              0x822B
+#endif
+
+/* GL_EXT_texture_storage */
+#ifndef GL_EXT_texture_storage
+#define GL_TEXTURE_IMMUTABLE_FORMAT_EXT                         0x912F
+#define GL_ALPHA8_EXT                                           0x803C  
+#define GL_LUMINANCE8_EXT                                       0x8040
+#define GL_LUMINANCE8_ALPHA8_EXT                                0x8045
+#define GL_RGBA32F_EXT                                          0x8814  
+#define GL_RGB32F_EXT                                           0x8815
+#define GL_ALPHA32F_EXT                                         0x8816
+#define GL_LUMINANCE32F_EXT                                     0x8818
+#define GL_LUMINANCE_ALPHA32F_EXT                               0x8819
+/* reuse GL_RGBA16F_EXT */
+/* reuse GL_RGB16F_EXT */
+#define GL_ALPHA16F_EXT                                         0x881C
+#define GL_LUMINANCE16F_EXT                                     0x881E
+#define GL_LUMINANCE_ALPHA16F_EXT                               0x881F
+#define GL_RGB10_A2_EXT                                         0x8059  
+#define GL_RGB10_EXT                                            0x8052
+#define GL_BGRA8_EXT                                            0x93A1
+#define GL_R8_EXT                                               0x8229
+#define GL_RG8_EXT                                              0x822B
+#define GL_R32F_EXT                                             0x822E  
+#define GL_RG32F_EXT                                            0x8230
+#define GL_R16F_EXT                                             0x822D
+#define GL_RG16F_EXT                                            0x822F
+#endif
+
+/* GL_EXT_texture_type_2_10_10_10_REV */
+#ifndef GL_EXT_texture_type_2_10_10_10_REV
+#define GL_UNSIGNED_INT_2_10_10_10_REV_EXT                      0x8368
+#endif
+
+/* GL_EXT_unpack_subimage */
+#ifndef GL_EXT_unpack_subimage
+#define GL_UNPACK_ROW_LENGTH                                    0x0CF2
+#define GL_UNPACK_SKIP_ROWS                                     0x0CF3
+#define GL_UNPACK_SKIP_PIXELS                                   0x0CF4
+#endif
+
+/*------------------------------------------------------------------------*
+ * DMP extension tokens
+ *------------------------------------------------------------------------*/
+
+/* GL_DMP_shader_binary */
+#ifndef GL_DMP_shader_binary
+#define GL_SHADER_BINARY_DMP                                    0x9250
+#endif
+
+/*------------------------------------------------------------------------*
+ * FJ extension tokens
+ *------------------------------------------------------------------------*/
+
+/* GL_FJ_shader_binary_GCCSO */
+#ifndef GL_FJ_shader_binary_GCCSO
+#define GCCSO_SHADER_BINARY_FJ                                  0x9260
+#endif
+
+/*------------------------------------------------------------------------*
+ * IMG extension tokens
+ *------------------------------------------------------------------------*/
+
+/* GL_IMG_program_binary */
+#ifndef GL_IMG_program_binary
+#define GL_SGX_PROGRAM_BINARY_IMG                               0x9130
+#endif
+
+/* GL_IMG_read_format */
+#ifndef GL_IMG_read_format
+#define GL_BGRA_IMG                                             0x80E1
+#define GL_UNSIGNED_SHORT_4_4_4_4_REV_IMG                       0x8365
+#endif
+
+/* GL_IMG_shader_binary */
+#ifndef GL_IMG_shader_binary
+#define GL_SGX_BINARY_IMG                                       0x8C0A
+#endif
+
+/* GL_IMG_texture_compression_pvrtc */
+#ifndef GL_IMG_texture_compression_pvrtc
+#define GL_COMPRESSED_RGB_PVRTC_4BPPV1_IMG                      0x8C00
+#define GL_COMPRESSED_RGB_PVRTC_2BPPV1_IMG                      0x8C01
+#define GL_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG                     0x8C02
+#define GL_COMPRESSED_RGBA_PVRTC_2BPPV1_IMG                     0x8C03
+#endif
+
+/* GL_IMG_multisampled_render_to_texture */
+#ifndef GL_IMG_multisampled_render_to_texture
+#define GL_RENDERBUFFER_SAMPLES_IMG                             0x9133
+#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE_IMG               0x9134
+#define GL_MAX_SAMPLES_IMG                                      0x9135
+#define GL_TEXTURE_SAMPLES_IMG                                  0x9136
+#endif
+
+/*------------------------------------------------------------------------*
+ * NV extension tokens
+ *------------------------------------------------------------------------*/
+
+/* GL_NV_coverage_sample */
+#ifndef GL_NV_coverage_sample
+#define GL_COVERAGE_COMPONENT_NV                                0x8ED0
+#define GL_COVERAGE_COMPONENT4_NV                               0x8ED1
+#define GL_COVERAGE_ATTACHMENT_NV                               0x8ED2
+#define GL_COVERAGE_BUFFERS_NV                                  0x8ED3
+#define GL_COVERAGE_SAMPLES_NV                                  0x8ED4
+#define GL_COVERAGE_ALL_FRAGMENTS_NV                            0x8ED5
+#define GL_COVERAGE_EDGE_FRAGMENTS_NV                           0x8ED6
+#define GL_COVERAGE_AUTOMATIC_NV                                0x8ED7
+#define GL_COVERAGE_BUFFER_BIT_NV                               0x8000
+#endif
+
+/* GL_NV_depth_nonlinear */
+#ifndef GL_NV_depth_nonlinear
+#define GL_DEPTH_COMPONENT16_NONLINEAR_NV                       0x8E2C
+#endif
+
+/* GL_NV_draw_buffers */
+#ifndef GL_NV_draw_buffers
+#define GL_MAX_DRAW_BUFFERS_NV                                  0x8824
+#define GL_DRAW_BUFFER0_NV                                      0x8825
+#define GL_DRAW_BUFFER1_NV                                      0x8826
+#define GL_DRAW_BUFFER2_NV                                      0x8827
+#define GL_DRAW_BUFFER3_NV                                      0x8828
+#define GL_DRAW_BUFFER4_NV                                      0x8829
+#define GL_DRAW_BUFFER5_NV                                      0x882A
+#define GL_DRAW_BUFFER6_NV                                      0x882B
+#define GL_DRAW_BUFFER7_NV                                      0x882C
+#define GL_DRAW_BUFFER8_NV                                      0x882D
+#define GL_DRAW_BUFFER9_NV                                      0x882E
+#define GL_DRAW_BUFFER10_NV                                     0x882F
+#define GL_DRAW_BUFFER11_NV                                     0x8830
+#define GL_DRAW_BUFFER12_NV                                     0x8831
+#define GL_DRAW_BUFFER13_NV                                     0x8832
+#define GL_DRAW_BUFFER14_NV                                     0x8833
+#define GL_DRAW_BUFFER15_NV                                     0x8834
+#define GL_COLOR_ATTACHMENT0_NV                                 0x8CE0
+#define GL_COLOR_ATTACHMENT1_NV                                 0x8CE1
+#define GL_COLOR_ATTACHMENT2_NV                                 0x8CE2
+#define GL_COLOR_ATTACHMENT3_NV                                 0x8CE3
+#define GL_COLOR_ATTACHMENT4_NV                                 0x8CE4
+#define GL_COLOR_ATTACHMENT5_NV                                 0x8CE5
+#define GL_COLOR_ATTACHMENT6_NV                                 0x8CE6
+#define GL_COLOR_ATTACHMENT7_NV                                 0x8CE7
+#define GL_COLOR_ATTACHMENT8_NV                                 0x8CE8
+#define GL_COLOR_ATTACHMENT9_NV                                 0x8CE9
+#define GL_COLOR_ATTACHMENT10_NV                                0x8CEA
+#define GL_COLOR_ATTACHMENT11_NV                                0x8CEB
+#define GL_COLOR_ATTACHMENT12_NV                                0x8CEC
+#define GL_COLOR_ATTACHMENT13_NV                                0x8CED
+#define GL_COLOR_ATTACHMENT14_NV                                0x8CEE
+#define GL_COLOR_ATTACHMENT15_NV                                0x8CEF
+#endif
+
+/* GL_NV_fbo_color_attachments */
+#ifndef GL_NV_fbo_color_attachments
+#define GL_MAX_COLOR_ATTACHMENTS_NV                             0x8CDF
+/* GL_COLOR_ATTACHMENT{0-15}_NV defined in GL_NV_draw_buffers already. */
+#endif
+
+/* GL_NV_fence */
+#ifndef GL_NV_fence
+#define GL_ALL_COMPLETED_NV                                     0x84F2
+#define GL_FENCE_STATUS_NV                                      0x84F3
+#define GL_FENCE_CONDITION_NV                                   0x84F4
+#endif
+
+/* GL_NV_read_buffer */
+#ifndef GL_NV_read_buffer
+#define GL_READ_BUFFER_NV                                       0x0C02
+#endif
+
+/* GL_NV_read_buffer_front */
+/* No new tokens introduced by this extension. */
+
+/* GL_NV_read_depth */
+/* No new tokens introduced by this extension. */
+
+/* GL_NV_read_depth_stencil */
+/* No new tokens introduced by this extension. */
+
+/* GL_NV_read_stencil */
+/* No new tokens introduced by this extension. */
+
+/* GL_NV_texture_compression_s3tc_update */
+/* No new tokens introduced by this extension. */
+
+/* GL_NV_texture_npot_2D_mipmap */
+/* No new tokens introduced by this extension. */
+
+/*------------------------------------------------------------------------*
+ * QCOM extension tokens
+ *------------------------------------------------------------------------*/
+
+/* GL_QCOM_alpha_test */
+#ifndef GL_QCOM_alpha_test
+#define GL_ALPHA_TEST_QCOM                                      0x0BC0
+#define GL_ALPHA_TEST_FUNC_QCOM                                 0x0BC1
+#define GL_ALPHA_TEST_REF_QCOM                                  0x0BC2
+#endif
+
+/* GL_QCOM_binning_control */
+#ifndef GL_QCOM_binning_control
+#define GL_BINNING_CONTROL_HINT_QCOM                            0x8FB0
+#define GL_CPU_OPTIMIZED_QCOM                                   0x8FB1
+#define GL_GPU_OPTIMIZED_QCOM                                   0x8FB2
+#define GL_RENDER_DIRECT_TO_FRAMEBUFFER_QCOM                    0x8FB3
+#endif
+
+/* GL_QCOM_driver_control */
+/* No new tokens introduced by this extension. */
+
+/* GL_QCOM_extended_get */
+#ifndef GL_QCOM_extended_get
+#define GL_TEXTURE_WIDTH_QCOM                                   0x8BD2
+#define GL_TEXTURE_HEIGHT_QCOM                                  0x8BD3
+#define GL_TEXTURE_DEPTH_QCOM                                   0x8BD4
+#define GL_TEXTURE_INTERNAL_FORMAT_QCOM                         0x8BD5
+#define GL_TEXTURE_FORMAT_QCOM                                  0x8BD6
+#define GL_TEXTURE_TYPE_QCOM                                    0x8BD7
+#define GL_TEXTURE_IMAGE_VALID_QCOM                             0x8BD8
+#define GL_TEXTURE_NUM_LEVELS_QCOM                              0x8BD9
+#define GL_TEXTURE_TARGET_QCOM                                  0x8BDA
+#define GL_TEXTURE_OBJECT_VALID_QCOM                            0x8BDB
+#define GL_STATE_RESTORE                                        0x8BDC
+#endif
+
+/* GL_QCOM_extended_get2 */
+/* No new tokens introduced by this extension. */
+
+/* GL_QCOM_perfmon_global_mode */
+#ifndef GL_QCOM_perfmon_global_mode
+#define GL_PERFMON_GLOBAL_MODE_QCOM                             0x8FA0
+#endif
+
+/* GL_QCOM_writeonly_rendering */
+#ifndef GL_QCOM_writeonly_rendering
+#define GL_WRITEONLY_RENDERING_QCOM                             0x8823
+#endif
+
+/* GL_QCOM_tiled_rendering */
+#ifndef GL_QCOM_tiled_rendering
+#define GL_COLOR_BUFFER_BIT0_QCOM                               0x00000001
+#define GL_COLOR_BUFFER_BIT1_QCOM                               0x00000002
+#define GL_COLOR_BUFFER_BIT2_QCOM                               0x00000004
+#define GL_COLOR_BUFFER_BIT3_QCOM                               0x00000008
+#define GL_COLOR_BUFFER_BIT4_QCOM                               0x00000010
+#define GL_COLOR_BUFFER_BIT5_QCOM                               0x00000020
+#define GL_COLOR_BUFFER_BIT6_QCOM                               0x00000040
+#define GL_COLOR_BUFFER_BIT7_QCOM                               0x00000080
+#define GL_DEPTH_BUFFER_BIT0_QCOM                               0x00000100
+#define GL_DEPTH_BUFFER_BIT1_QCOM                               0x00000200
+#define GL_DEPTH_BUFFER_BIT2_QCOM                               0x00000400
+#define GL_DEPTH_BUFFER_BIT3_QCOM                               0x00000800
+#define GL_DEPTH_BUFFER_BIT4_QCOM                               0x00001000
+#define GL_DEPTH_BUFFER_BIT5_QCOM                               0x00002000
+#define GL_DEPTH_BUFFER_BIT6_QCOM                               0x00004000
+#define GL_DEPTH_BUFFER_BIT7_QCOM                               0x00008000
+#define GL_STENCIL_BUFFER_BIT0_QCOM                             0x00010000
+#define GL_STENCIL_BUFFER_BIT1_QCOM                             0x00020000
+#define GL_STENCIL_BUFFER_BIT2_QCOM                             0x00040000
+#define GL_STENCIL_BUFFER_BIT3_QCOM                             0x00080000
+#define GL_STENCIL_BUFFER_BIT4_QCOM                             0x00100000
+#define GL_STENCIL_BUFFER_BIT5_QCOM                             0x00200000
+#define GL_STENCIL_BUFFER_BIT6_QCOM                             0x00400000
+#define GL_STENCIL_BUFFER_BIT7_QCOM                             0x00800000
+#define GL_MULTISAMPLE_BUFFER_BIT0_QCOM                         0x01000000
+#define GL_MULTISAMPLE_BUFFER_BIT1_QCOM                         0x02000000
+#define GL_MULTISAMPLE_BUFFER_BIT2_QCOM                         0x04000000
+#define GL_MULTISAMPLE_BUFFER_BIT3_QCOM                         0x08000000
+#define GL_MULTISAMPLE_BUFFER_BIT4_QCOM                         0x10000000
+#define GL_MULTISAMPLE_BUFFER_BIT5_QCOM                         0x20000000
+#define GL_MULTISAMPLE_BUFFER_BIT6_QCOM                         0x40000000
+#define GL_MULTISAMPLE_BUFFER_BIT7_QCOM                         0x80000000
+#endif
+
+/*------------------------------------------------------------------------*
+ * VIV extension tokens
+ *------------------------------------------------------------------------*/
+
+/* GL_VIV_shader_binary */
+#ifndef GL_VIV_shader_binary
+#define GL_SHADER_BINARY_VIV                                    0x8FC4
+#endif
+
+/*------------------------------------------------------------------------*
+ * End of extension tokens, start of corresponding extension functions
+ *------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------*
+ * OES extension functions
+ *------------------------------------------------------------------------*/
+
+/* GL_OES_compressed_ETC1_RGB8_texture */
+#ifndef GL_OES_compressed_ETC1_RGB8_texture
+#define GL_OES_compressed_ETC1_RGB8_texture 1
+#endif
+
+/* GL_OES_compressed_paletted_texture */
+#ifndef GL_OES_compressed_paletted_texture
+#define GL_OES_compressed_paletted_texture 1
+#endif
+
+/* GL_OES_depth24 */
+#ifndef GL_OES_depth24
+#define GL_OES_depth24 1
+#endif
+
+/* GL_OES_depth32 */
+#ifndef GL_OES_depth32
+#define GL_OES_depth32 1
+#endif
+
+/* GL_OES_depth_texture */
+#ifndef GL_OES_depth_texture
+#define GL_OES_depth_texture 1
+#endif
+
+/* GL_OES_EGL_image */
+#ifndef GL_OES_EGL_image
+#define GL_OES_EGL_image 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glEGLImageTargetTexture2DOES (GLenum target, GLeglImageOES image);
+GL_APICALL void GL_APIENTRY glEGLImageTargetRenderbufferStorageOES (GLenum target, GLeglImageOES image);
+#endif
+typedef void (GL_APIENTRYP PFNGLEGLIMAGETARGETTEXTURE2DOESPROC) (GLenum target, GLeglImageOES image);
+typedef void (GL_APIENTRYP PFNGLEGLIMAGETARGETRENDERBUFFERSTORAGEOESPROC) (GLenum target, GLeglImageOES image);
+#endif
+
+/* GL_OES_EGL_image_external */
+#ifndef GL_OES_EGL_image_external
+#define GL_OES_EGL_image_external 1
+/* glEGLImageTargetTexture2DOES defined in GL_OES_EGL_image already. */
+#endif
+
+/* GL_OES_element_index_uint */
+#ifndef GL_OES_element_index_uint
+#define GL_OES_element_index_uint 1
+#endif
+
+/* GL_OES_fbo_render_mipmap */
+#ifndef GL_OES_fbo_render_mipmap
+#define GL_OES_fbo_render_mipmap 1
+#endif
+
+/* GL_OES_fragment_precision_high */
+#ifndef GL_OES_fragment_precision_high
+#define GL_OES_fragment_precision_high 1
+#endif
+
+/* GL_OES_get_program_binary */
+#ifndef GL_OES_get_program_binary
+#define GL_OES_get_program_binary 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glGetProgramBinaryOES (GLuint program, GLsizei bufSize, GLsizei *length, GLenum *binaryFormat, GLvoid *binary);
+GL_APICALL void GL_APIENTRY glProgramBinaryOES (GLuint program, GLenum binaryFormat, const GLvoid *binary, GLint length);
+#endif
+typedef void (GL_APIENTRYP PFNGLGETPROGRAMBINARYOESPROC) (GLuint program, GLsizei bufSize, GLsizei *length, GLenum *binaryFormat, GLvoid *binary);
+typedef void (GL_APIENTRYP PFNGLPROGRAMBINARYOESPROC) (GLuint program, GLenum binaryFormat, const GLvoid *binary, GLint length);
+#endif
+
+/* GL_OES_mapbuffer */
+#ifndef GL_OES_mapbuffer
+#define GL_OES_mapbuffer 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void* GL_APIENTRY glMapBufferOES (GLenum target, GLenum access);
+GL_APICALL GLboolean GL_APIENTRY glUnmapBufferOES (GLenum target);
+GL_APICALL void GL_APIENTRY glGetBufferPointervOES (GLenum target, GLenum pname, GLvoid** params);
+#endif
+typedef void* (GL_APIENTRYP PFNGLMAPBUFFEROESPROC) (GLenum target, GLenum access);
+typedef GLboolean (GL_APIENTRYP PFNGLUNMAPBUFFEROESPROC) (GLenum target);
+typedef void (GL_APIENTRYP PFNGLGETBUFFERPOINTERVOESPROC) (GLenum target, GLenum pname, GLvoid** params);
+#endif
+
+/* GL_OES_packed_depth_stencil */
+#ifndef GL_OES_packed_depth_stencil
+#define GL_OES_packed_depth_stencil 1
+#endif
+
+/* GL_OES_required_internalformat */
+#ifndef GL_OES_required_internalformat
+#define GL_OES_required_internalformat 1
+#endif
+
+/* GL_OES_rgb8_rgba8 */
+#ifndef GL_OES_rgb8_rgba8
+#define GL_OES_rgb8_rgba8 1
+#endif
+
+/* GL_OES_standard_derivatives */
+#ifndef GL_OES_standard_derivatives
+#define GL_OES_standard_derivatives 1
+#endif
+
+/* GL_OES_stencil1 */
+#ifndef GL_OES_stencil1
+#define GL_OES_stencil1 1
+#endif
+
+/* GL_OES_stencil4 */
+#ifndef GL_OES_stencil4
+#define GL_OES_stencil4 1
+#endif
+
+#ifndef GL_OES_surfaceless_context
+#define GL_OES_surfaceless_context 1
+#endif
+
+/* GL_OES_texture_3D */
+#ifndef GL_OES_texture_3D
+#define GL_OES_texture_3D 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glTexImage3DOES (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const GLvoid* pixels);
+GL_APICALL void GL_APIENTRY glTexSubImage3DOES (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const GLvoid* pixels);
+GL_APICALL void GL_APIENTRY glCopyTexSubImage3DOES (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+GL_APICALL void GL_APIENTRY glCompressedTexImage3DOES (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const GLvoid* data);
+GL_APICALL void GL_APIENTRY glCompressedTexSubImage3DOES (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const GLvoid* data);
+GL_APICALL void GL_APIENTRY glFramebufferTexture3DOES (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLint zoffset);
+#endif
+typedef void (GL_APIENTRYP PFNGLTEXIMAGE3DOESPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const GLvoid* pixels);
+typedef void (GL_APIENTRYP PFNGLTEXSUBIMAGE3DOESPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const GLvoid* pixels);
+typedef void (GL_APIENTRYP PFNGLCOPYTEXSUBIMAGE3DOESPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+typedef void (GL_APIENTRYP PFNGLCOMPRESSEDTEXIMAGE3DOESPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const GLvoid* data);
+typedef void (GL_APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE3DOESPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const GLvoid* data);
+typedef void (GL_APIENTRYP PFNGLFRAMEBUFFERTEXTURE3DOES) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLint zoffset);
+#endif
+
+/* GL_OES_texture_float */
+#ifndef GL_OES_texture_float
+#define GL_OES_texture_float 1
+#endif
+
+/* GL_OES_texture_float_linear */
+#ifndef GL_OES_texture_float_linear
+#define GL_OES_texture_float_linear 1
+#endif
+
+/* GL_OES_texture_half_float */
+#ifndef GL_OES_texture_half_float
+#define GL_OES_texture_half_float 1
+#endif
+
+/* GL_OES_texture_half_float_linear */
+#ifndef GL_OES_texture_half_float_linear
+#define GL_OES_texture_half_float_linear 1
+#endif
+
+/* GL_OES_texture_npot */
+#ifndef GL_OES_texture_npot
+#define GL_OES_texture_npot 1
+#endif
+
+/* GL_OES_vertex_array_object */
+#ifndef GL_OES_vertex_array_object
+#define GL_OES_vertex_array_object 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glBindVertexArrayOES (GLuint array);
+GL_APICALL void GL_APIENTRY glDeleteVertexArraysOES (GLsizei n, const GLuint *arrays);
+GL_APICALL void GL_APIENTRY glGenVertexArraysOES (GLsizei n, GLuint *arrays);
+GL_APICALL GLboolean GL_APIENTRY glIsVertexArrayOES (GLuint array);
+#endif
+typedef void (GL_APIENTRYP PFNGLBINDVERTEXARRAYOESPROC) (GLuint array);
+typedef void (GL_APIENTRYP PFNGLDELETEVERTEXARRAYSOESPROC) (GLsizei n, const GLuint *arrays);
+typedef void (GL_APIENTRYP PFNGLGENVERTEXARRAYSOESPROC) (GLsizei n, GLuint *arrays);
+typedef GLboolean (GL_APIENTRYP PFNGLISVERTEXARRAYOESPROC) (GLuint array);
+#endif
+
+/* GL_OES_vertex_half_float */
+#ifndef GL_OES_vertex_half_float
+#define GL_OES_vertex_half_float 1
+#endif
+
+/* GL_OES_vertex_type_10_10_10_2 */
+#ifndef GL_OES_vertex_type_10_10_10_2
+#define GL_OES_vertex_type_10_10_10_2 1
+#endif
+
+/*------------------------------------------------------------------------*
+ * KHR extension functions
+ *------------------------------------------------------------------------*/
+
+#ifndef GL_KHR_debug
+#define GL_KHR_debug 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glDebugMessageControl (GLenum source, GLenum type, GLenum severity, GLsizei count, const GLuint *ids, GLboolean enabled);
+GL_APICALL void GL_APIENTRY glDebugMessageInsert (GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar *buf);
+GL_APICALL void GL_APIENTRY glDebugMessageCallback (GLDEBUGPROC callback, const void *userParam);
+GL_APICALL GLuint GL_APIENTRY glGetDebugMessageLog (GLuint count, GLsizei bufsize, GLenum *sources, GLenum *types, GLuint *ids, GLenum *severities, GLsizei *lengths, GLchar *messageLog);
+GL_APICALL void GL_APIENTRY glPushDebugGroup (GLenum source, GLuint id, GLsizei length, const GLchar *message);
+GL_APICALL void GL_APIENTRY glPopDebugGroup (void);
+GL_APICALL void GL_APIENTRY glObjectLabel (GLenum identifier, GLuint name, GLsizei length, const GLchar *label);
+GL_APICALL void GL_APIENTRY glGetObjectLabel (GLenum identifier, GLuint name, GLsizei bufSize, GLsizei *length, GLchar *label);
+GL_APICALL void GL_APIENTRY glObjectPtrLabel (const void *ptr, GLsizei length, const GLchar *label);
+GL_APICALL void GL_APIENTRY glGetObjectPtrLabel (const void *ptr, GLsizei bufSize, GLsizei *length, GLchar *label);
+GL_APICALL void GL_APIENTRY glGetPointerv (GLenum pname, void **params);
+#endif 
+typedef void (GL_APIENTRYP PFNGLDEBUGMESSAGECONTROLPROC) (GLenum source, GLenum type, GLenum severity, GLsizei count, const GLuint *ids, GLboolean enabled);
+typedef void (GL_APIENTRYP PFNGLDEBUGMESSAGEINSERTPROC) (GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar *buf);
+typedef void (GL_APIENTRYP PFNGLDEBUGMESSAGECALLBACKPROC) (GLDEBUGPROC callback, const void *userParam);
+typedef GLuint (GL_APIENTRYP PFNGLGETDEBUGMESSAGELOGPROC) (GLuint count, GLsizei bufsize, GLenum *sources, GLenum *types, GLuint *ids, GLenum *severities, GLsizei *lengths, GLchar *messageLog);
+typedef void (GL_APIENTRYP PFNGLPUSHDEBUGGROUPPROC) (GLenum source, GLuint id, GLsizei length, const GLchar *message);
+typedef void (GL_APIENTRYP PFNGLPOPDEBUGGROUPPROC) (void);
+typedef void (GL_APIENTRYP PFNGLOBJECTLABELPROC) (GLenum identifier, GLuint name, GLsizei length, const GLchar *label);
+typedef void (GL_APIENTRYP PFNGLGETOBJECTLABELPROC) (GLenum identifier, GLuint name, GLsizei bufSize, GLsizei *length, GLchar *label);
+typedef void (GL_APIENTRYP PFNGLOBJECTPTRLABELPROC) (const void *ptr, GLsizei length, const GLchar *label);
+typedef void (GL_APIENTRYP PFNGLGETOBJECTPTRLABELPROC) (const void *ptr, GLsizei bufSize, GLsizei *length, GLchar *label);
+typedef void (GL_APIENTRYP PFNGLGETPOINTERVPROC) (GLenum pname, void **params);
+#endif
+
+#ifndef GL_KHR_texture_compression_astc_ldr
+#define GL_KHR_texture_compression_astc_ldr 1
+#endif
+
+
+/*------------------------------------------------------------------------*
+ * AMD extension functions
+ *------------------------------------------------------------------------*/
+
+/* GL_AMD_compressed_3DC_texture */
+#ifndef GL_AMD_compressed_3DC_texture
+#define GL_AMD_compressed_3DC_texture 1
+#endif
+
+/* GL_AMD_compressed_ATC_texture */
+#ifndef GL_AMD_compressed_ATC_texture
+#define GL_AMD_compressed_ATC_texture 1
+#endif
+
+/* AMD_performance_monitor */
+#ifndef GL_AMD_performance_monitor
+#define GL_AMD_performance_monitor 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glGetPerfMonitorGroupsAMD (GLint *numGroups, GLsizei groupsSize, GLuint *groups);
+GL_APICALL void GL_APIENTRY glGetPerfMonitorCountersAMD (GLuint group, GLint *numCounters, GLint *maxActiveCounters, GLsizei counterSize, GLuint *counters);
+GL_APICALL void GL_APIENTRY glGetPerfMonitorGroupStringAMD (GLuint group, GLsizei bufSize, GLsizei *length, GLchar *groupString);
+GL_APICALL void GL_APIENTRY glGetPerfMonitorCounterStringAMD (GLuint group, GLuint counter, GLsizei bufSize, GLsizei *length, GLchar *counterString);
+GL_APICALL void GL_APIENTRY glGetPerfMonitorCounterInfoAMD (GLuint group, GLuint counter, GLenum pname, GLvoid *data);
+GL_APICALL void GL_APIENTRY glGenPerfMonitorsAMD (GLsizei n, GLuint *monitors);
+GL_APICALL void GL_APIENTRY glDeletePerfMonitorsAMD (GLsizei n, GLuint *monitors);
+GL_APICALL void GL_APIENTRY glSelectPerfMonitorCountersAMD (GLuint monitor, GLboolean enable, GLuint group, GLint numCounters, GLuint *countersList);
+GL_APICALL void GL_APIENTRY glBeginPerfMonitorAMD (GLuint monitor);
+GL_APICALL void GL_APIENTRY glEndPerfMonitorAMD (GLuint monitor);
+GL_APICALL void GL_APIENTRY glGetPerfMonitorCounterDataAMD (GLuint monitor, GLenum pname, GLsizei dataSize, GLuint *data, GLint *bytesWritten);
+#endif
+typedef void (GL_APIENTRYP PFNGLGETPERFMONITORGROUPSAMDPROC) (GLint *numGroups, GLsizei groupsSize, GLuint *groups);
+typedef void (GL_APIENTRYP PFNGLGETPERFMONITORCOUNTERSAMDPROC) (GLuint group, GLint *numCounters, GLint *maxActiveCounters, GLsizei counterSize, GLuint *counters);
+typedef void (GL_APIENTRYP PFNGLGETPERFMONITORGROUPSTRINGAMDPROC) (GLuint group, GLsizei bufSize, GLsizei *length, GLchar *groupString);
+typedef void (GL_APIENTRYP PFNGLGETPERFMONITORCOUNTERSTRINGAMDPROC) (GLuint group, GLuint counter, GLsizei bufSize, GLsizei *length, GLchar *counterString);
+typedef void (GL_APIENTRYP PFNGLGETPERFMONITORCOUNTERINFOAMDPROC) (GLuint group, GLuint counter, GLenum pname, GLvoid *data);
+typedef void (GL_APIENTRYP PFNGLGENPERFMONITORSAMDPROC) (GLsizei n, GLuint *monitors);
+typedef void (GL_APIENTRYP PFNGLDELETEPERFMONITORSAMDPROC) (GLsizei n, GLuint *monitors);
+typedef void (GL_APIENTRYP PFNGLSELECTPERFMONITORCOUNTERSAMDPROC) (GLuint monitor, GLboolean enable, GLuint group, GLint numCounters, GLuint *countersList);
+typedef void (GL_APIENTRYP PFNGLBEGINPERFMONITORAMDPROC) (GLuint monitor);
+typedef void (GL_APIENTRYP PFNGLENDPERFMONITORAMDPROC) (GLuint monitor);
+typedef void (GL_APIENTRYP PFNGLGETPERFMONITORCOUNTERDATAAMDPROC) (GLuint monitor, GLenum pname, GLsizei dataSize, GLuint *data, GLint *bytesWritten);
+#endif
+
+/* GL_AMD_program_binary_Z400 */
+#ifndef GL_AMD_program_binary_Z400
+#define GL_AMD_program_binary_Z400 1
+#endif
+
+/*------------------------------------------------------------------------*
+ * ANGLE extension functions
+ *------------------------------------------------------------------------*/
+
+/* GL_ANGLE_framebuffer_blit */
+#ifndef GL_ANGLE_framebuffer_blit
+#define GL_ANGLE_framebuffer_blit 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glBlitFramebufferANGLE (GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter);
+#endif
+typedef void (GL_APIENTRYP PFNGLBLITFRAMEBUFFERANGLEPROC) (GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter);
+#endif
+
+/* GL_ANGLE_framebuffer_multisample */
+#ifndef GL_ANGLE_framebuffer_multisample
+#define GL_ANGLE_framebuffer_multisample 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glRenderbufferStorageMultisampleANGLE (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height);
+#endif
+typedef void (GL_APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEANGLEPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height);
+#endif
+
+#ifndef GL_ANGLE_instanced_arrays 
+#define GL_ANGLE_instanced_arrays 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glDrawArraysInstancedANGLE (GLenum mode, GLint first, GLsizei count, GLsizei primcount);
+GL_APICALL void GL_APIENTRY glDrawElementsInstancedANGLE (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei primcount);
+GL_APICALL void GL_APIENTRY glVertexAttribDivisorANGLE (GLuint index, GLuint divisor);
+#endif
+typedef void (GL_APIENTRYP PFLGLDRAWARRAYSINSTANCEDANGLEPROC) (GLenum mode, GLint first, GLsizei count, GLsizei primcount);
+typedef void (GL_APIENTRYP PFLGLDRAWELEMENTSINSTANCEDANGLEPROC) (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei primcount);
+typedef void (GL_APIENTRYP PFLGLVERTEXATTRIBDIVISORANGLEPROC) (GLuint index, GLuint divisor);
+#endif
+
+/* GL_ANGLE_pack_reverse_row_order */
+#ifndef GL_ANGLE_pack_reverse_row_order 
+#define GL_ANGLE_pack_reverse_row_order 1
+#endif
+
+/* GL_ANGLE_texture_compression_dxt3 */
+#ifndef GL_ANGLE_texture_compression_dxt3 
+#define GL_ANGLE_texture_compression_dxt3 1
+#endif
+
+/* GL_ANGLE_texture_compression_dxt5 */
+#ifndef GL_ANGLE_texture_compression_dxt5 
+#define GL_ANGLE_texture_compression_dxt5 1
+#endif
+
+/* GL_ANGLE_texture_usage */
+#ifndef GL_ANGLE_texture_usage 
+#define GL_ANGLE_texture_usage 1
+#endif
+
+#ifndef GL_ANGLE_translated_shader_source 
+#define GL_ANGLE_translated_shader_source 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glGetTranslatedShaderSourceANGLE (GLuint shader, GLsizei bufsize, GLsizei *length, GLchar *source);
+#endif
+typedef void (GL_APIENTRYP PFLGLGETTRANSLATEDSHADERSOURCEANGLEPROC) (GLuint shader, GLsizei bufsize, GLsizei *length, GLchar *source);
+#endif
+
+/*------------------------------------------------------------------------*
+ * APPLE extension functions
+ *------------------------------------------------------------------------*/
+
+/* GL_APPLE_copy_texture_levels */
+#ifndef GL_APPLE_copy_texture_levels
+#define GL_APPLE_copy_texture_levels 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glCopyTextureLevelsAPPLE (GLuint destinationTexture, GLuint sourceTexture, GLint sourceBaseLevel, GLsizei sourceLevelCount);
+#endif
+typedef void (GL_APIENTRYP PFNGLCOPYTEXTURELEVELSAPPLEPROC) (GLuint destinationTexture, GLuint sourceTexture, GLint sourceBaseLevel, GLsizei sourceLevelCount);
+#endif
+
+/* GL_APPLE_framebuffer_multisample */
+#ifndef GL_APPLE_framebuffer_multisample
+#define GL_APPLE_framebuffer_multisample 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glRenderbufferStorageMultisampleAPPLE (GLenum, GLsizei, GLenum, GLsizei, GLsizei);
+GL_APICALL void GL_APIENTRY glResolveMultisampleFramebufferAPPLE (void);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (GL_APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEAPPLEPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (GL_APIENTRYP PFNGLRESOLVEMULTISAMPLEFRAMEBUFFERAPPLEPROC) (void);
+#endif
+
+/* GL_APPLE_rgb_422 */
+#ifndef GL_APPLE_rgb_422
+#define GL_APPLE_rgb_422 1
+#endif
+
+/* GL_APPLE_sync */
+#ifndef GL_APPLE_sync
+#define GL_APPLE_sync 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL GLsync GL_APIENTRY glFenceSyncAPPLE (GLenum condition, GLbitfield flags);
+GL_APICALL GLboolean GL_APIENTRY glIsSyncAPPLE (GLsync sync);
+GL_APICALL void GL_APIENTRY glDeleteSyncAPPLE (GLsync sync);
+GL_APICALL GLenum GL_APIENTRY glClientWaitSyncAPPLE (GLsync sync, GLbitfield flags, GLuint64 timeout);
+GL_APICALL void GL_APIENTRY glWaitSyncAPPLE (GLsync sync, GLbitfield flags, GLuint64 timeout);
+GL_APICALL void GL_APIENTRY glGetInteger64vAPPLE (GLenum pname, GLint64 *params);
+GL_APICALL void GL_APIENTRY glGetSyncivAPPLE (GLsync sync, GLenum pname, GLsizei bufSize, GLsizei *length, GLint *values);
+#endif
+typedef GLsync (GL_APIENTRYP PFNGLFENCESYNCAPPLEPROC) (GLenum condition, GLbitfield flags);
+typedef GLboolean (GL_APIENTRYP PFNGLISSYNCAPPLEPROC) (GLsync sync);
+typedef void (GL_APIENTRYP PFNGLDELETESYNCAPPLEPROC) (GLsync sync);
+typedef GLenum (GL_APIENTRYP PFNGLCLIENTWAITSYNCAPPLEPROC) (GLsync sync, GLbitfield flags, GLuint64 timeout);
+typedef void (GL_APIENTRYP PFNGLWAITSYNCAPPLEPROC) (GLsync sync, GLbitfield flags, GLuint64 timeout);
+typedef void (GL_APIENTRYP PFNGLGETINTEGER64VAPPLEPROC) (GLenum pname, GLint64 *params);
+typedef void (GL_APIENTRYP PFNGLGETSYNCIVAPPLEPROC) (GLsync sync, GLenum pname, GLsizei bufSize, GLsizei *length, GLint *values);
+#endif
+
+/* GL_APPLE_texture_format_BGRA8888 */
+#ifndef GL_APPLE_texture_format_BGRA8888
+#define GL_APPLE_texture_format_BGRA8888 1
+#endif
+
+/* GL_APPLE_texture_max_level */
+#ifndef GL_APPLE_texture_max_level
+#define GL_APPLE_texture_max_level 1
+#endif
+
+/*------------------------------------------------------------------------*
+ * ARM extension functions
+ *------------------------------------------------------------------------*/
+
+/* GL_ARM_mali_program_binary */
+#ifndef GL_ARM_mali_program_binary
+#define GL_ARM_mali_program_binary 1
+#endif
+
+/* GL_ARM_mali_shader_binary */
+#ifndef GL_ARM_mali_shader_binary
+#define GL_ARM_mali_shader_binary 1
+#endif
+
+/* GL_ARM_rgba8 */
+#ifndef GL_ARM_rgba8
+#define GL_ARM_rgba8 1
+#endif
+
+/*------------------------------------------------------------------------*
+ * EXT extension functions
+ *------------------------------------------------------------------------*/
+
+/* GL_EXT_blend_minmax */
+#ifndef GL_EXT_blend_minmax
+#define GL_EXT_blend_minmax 1
+#endif
+
+/* GL_EXT_color_buffer_half_float */
+#ifndef GL_EXT_color_buffer_half_float
+#define GL_EXT_color_buffer_half_float 1
+#endif
+
+/* GL_EXT_debug_label */
+#ifndef GL_EXT_debug_label
+#define GL_EXT_debug_label 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glLabelObjectEXT (GLenum type, GLuint object, GLsizei length, const GLchar *label);
+GL_APICALL void GL_APIENTRY glGetObjectLabelEXT (GLenum type, GLuint object, GLsizei bufSize, GLsizei *length, GLchar *label);
+#endif
+typedef void (GL_APIENTRYP PFNGLLABELOBJECTEXTPROC) (GLenum type, GLuint object, GLsizei length, const GLchar *label);
+typedef void (GL_APIENTRYP PFNGLGETOBJECTLABELEXTPROC) (GLenum type, GLuint object, GLsizei bufSize, GLsizei *length, GLchar *label);
+#endif
+
+/* GL_EXT_debug_marker */
+#ifndef GL_EXT_debug_marker
+#define GL_EXT_debug_marker 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glInsertEventMarkerEXT (GLsizei length, const GLchar *marker);
+GL_APICALL void GL_APIENTRY glPushGroupMarkerEXT (GLsizei length, const GLchar *marker);
+GL_APICALL void GL_APIENTRY glPopGroupMarkerEXT (void);
+#endif
+typedef void (GL_APIENTRYP PFNGLINSERTEVENTMARKEREXTPROC) (GLsizei length, const GLchar *marker);
+typedef void (GL_APIENTRYP PFNGLPUSHGROUPMARKEREXTPROC) (GLsizei length, const GLchar *marker);
+typedef void (GL_APIENTRYP PFNGLPOPGROUPMARKEREXTPROC) (void);
+#endif
+
+/* GL_EXT_discard_framebuffer */
+#ifndef GL_EXT_discard_framebuffer
+#define GL_EXT_discard_framebuffer 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glDiscardFramebufferEXT (GLenum target, GLsizei numAttachments, const GLenum *attachments);
+#endif
+typedef void (GL_APIENTRYP PFNGLDISCARDFRAMEBUFFEREXTPROC) (GLenum target, GLsizei numAttachments, const GLenum *attachments);
+#endif
+
+/* GL_EXT_map_buffer_range */
+#ifndef GL_EXT_map_buffer_range
+#define GL_EXT_map_buffer_range 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void* GL_APIENTRY glMapBufferRangeEXT (GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access);
+GL_APICALL void GL_APIENTRY glFlushMappedBufferRangeEXT (GLenum target, GLintptr offset, GLsizeiptr length);
+#endif
+typedef void* (GL_APIENTRYP PFNGLMAPBUFFERRANGEEXTPROC) (GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access);
+typedef void (GL_APIENTRYP PFNGLFLUSHMAPPEDBUFFERRANGEEXTPROC) (GLenum target, GLintptr offset, GLsizeiptr length);
+#endif
+
+/* GL_EXT_multisampled_render_to_texture */
+#ifndef GL_EXT_multisampled_render_to_texture
+#define GL_EXT_multisampled_render_to_texture 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glRenderbufferStorageMultisampleEXT (GLenum, GLsizei, GLenum, GLsizei, GLsizei);
+GL_APICALL void GL_APIENTRY glFramebufferTexture2DMultisampleEXT (GLenum, GLenum, GLenum, GLuint, GLint, GLsizei);
+#endif
+typedef void (GL_APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (GL_APIENTRYP PFNGLFRAMEBUFFERTEXTURE2DMULTISAMPLEEXTPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLsizei samples);
+#endif
+
+/* GL_EXT_multiview_draw_buffers */
+#ifndef GL_EXT_multiview_draw_buffers
+#define GL_EXT_multiview_draw_buffers 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glReadBufferIndexedEXT (GLenum src, GLint index);
+GL_APICALL void GL_APIENTRY glDrawBuffersIndexedEXT (GLint n, const GLenum *location, const GLint *indices);
+GL_APICALL void GL_APIENTRY glGetIntegeri_vEXT (GLenum target, GLuint index, GLint *data);
+#endif
+typedef void (GL_APIENTRYP PFNGLREADBUFFERINDEXEDEXTPROC) (GLenum src, GLint index);
+typedef void (GL_APIENTRYP PFNGLDRAWBUFFERSINDEXEDEXTPROC) (GLint n, const GLenum *location, const GLint *indices);
+typedef void (GL_APIENTRYP PFNGLGETINTEGERI_VEXTPROC) (GLenum target, GLuint index, GLint *data);
+#endif
+
+#ifndef GL_EXT_multi_draw_arrays
+#define GL_EXT_multi_draw_arrays 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glMultiDrawArraysEXT (GLenum, GLint *, GLsizei *, GLsizei);
+GL_APICALL void GL_APIENTRY glMultiDrawElementsEXT (GLenum, const GLsizei *, GLenum, const GLvoid* *, GLsizei);
+#endif /* GL_GLEXT_PROTOTYPES */
+typedef void (GL_APIENTRYP PFNGLMULTIDRAWARRAYSEXTPROC) (GLenum mode, GLint *first, GLsizei *count, GLsizei primcount);
+typedef void (GL_APIENTRYP PFNGLMULTIDRAWELEMENTSEXTPROC) (GLenum mode, const GLsizei *count, GLenum type, const GLvoid* *indices, GLsizei primcount);
+#endif
+
+/* GL_EXT_occlusion_query_boolean */
+#ifndef GL_EXT_occlusion_query_boolean
+#define GL_EXT_occlusion_query_boolean 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glGenQueriesEXT (GLsizei n, GLuint *ids);
+GL_APICALL void GL_APIENTRY glDeleteQueriesEXT (GLsizei n, const GLuint *ids);
+GL_APICALL GLboolean GL_APIENTRY glIsQueryEXT (GLuint id);
+GL_APICALL void GL_APIENTRY glBeginQueryEXT (GLenum target, GLuint id);
+GL_APICALL void GL_APIENTRY glEndQueryEXT (GLenum target);
+GL_APICALL void GL_APIENTRY glGetQueryivEXT (GLenum target, GLenum pname, GLint *params);
+GL_APICALL void GL_APIENTRY glGetQueryObjectuivEXT (GLuint id, GLenum pname, GLuint *params);
+#endif
+typedef void (GL_APIENTRYP PFNGLGENQUERIESEXTPROC) (GLsizei n, GLuint *ids);
+typedef void (GL_APIENTRYP PFNGLDELETEQUERIESEXTPROC) (GLsizei n, const GLuint *ids);
+typedef GLboolean (GL_APIENTRYP PFNGLISQUERYEXTPROC) (GLuint id);
+typedef void (GL_APIENTRYP PFNGLBEGINQUERYEXTPROC) (GLenum target, GLuint id);
+typedef void (GL_APIENTRYP PFNGLENDQUERYEXTPROC) (GLenum target);
+typedef void (GL_APIENTRYP PFNGLGETQUERYIVEXTPROC) (GLenum target, GLenum pname, GLint *params);
+typedef void (GL_APIENTRYP PFNGLGETQUERYOBJECTUIVEXTPROC) (GLuint id, GLenum pname, GLuint *params);
+#endif
+
+/* GL_EXT_read_format_bgra */
+#ifndef GL_EXT_read_format_bgra
+#define GL_EXT_read_format_bgra 1
+#endif
+
+/* GL_EXT_robustness */
+#ifndef GL_EXT_robustness
+#define GL_EXT_robustness 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL GLenum GL_APIENTRY glGetGraphicsResetStatusEXT (void);
+GL_APICALL void GL_APIENTRY glReadnPixelsEXT (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLsizei bufSize, void *data);
+GL_APICALL void GL_APIENTRY glGetnUniformfvEXT (GLuint program, GLint location, GLsizei bufSize, float *params);
+GL_APICALL void GL_APIENTRY glGetnUniformivEXT (GLuint program, GLint location, GLsizei bufSize, GLint *params);
+#endif
+typedef GLenum (GL_APIENTRYP PFNGLGETGRAPHICSRESETSTATUSEXTPROC) (void);
+typedef void (GL_APIENTRYP PFNGLREADNPIXELSEXTPROC) (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLsizei bufSize, void *data);
+typedef void (GL_APIENTRYP PFNGLGETNUNIFORMFVEXTPROC) (GLuint program, GLint location, GLsizei bufSize, float *params);
+typedef void (GL_APIENTRYP PFNGLGETNUNIFORMIVEXTPROC) (GLuint program, GLint location, GLsizei bufSize, GLint *params);
+#endif
+
+/* GL_EXT_separate_shader_objects */
+#ifndef GL_EXT_separate_shader_objects
+#define GL_EXT_separate_shader_objects 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glUseProgramStagesEXT (GLuint pipeline, GLbitfield stages, GLuint program);
+GL_APICALL void GL_APIENTRY glActiveShaderProgramEXT (GLuint pipeline, GLuint program);
+GL_APICALL GLuint GL_APIENTRY glCreateShaderProgramvEXT (GLenum type, GLsizei count, const GLchar **strings);
+GL_APICALL void GL_APIENTRY glBindProgramPipelineEXT (GLuint pipeline);
+GL_APICALL void GL_APIENTRY glDeleteProgramPipelinesEXT (GLsizei n, const GLuint *pipelines);
+GL_APICALL void GL_APIENTRY glGenProgramPipelinesEXT (GLsizei n, GLuint *pipelines);
+GL_APICALL GLboolean GL_APIENTRY glIsProgramPipelineEXT (GLuint pipeline);
+GL_APICALL void GL_APIENTRY glProgramParameteriEXT (GLuint program, GLenum pname, GLint value);
+GL_APICALL void GL_APIENTRY glGetProgramPipelineivEXT (GLuint pipeline, GLenum pname, GLint *params);
+GL_APICALL void GL_APIENTRY glProgramUniform1iEXT (GLuint program, GLint location, GLint x);
+GL_APICALL void GL_APIENTRY glProgramUniform2iEXT (GLuint program, GLint location, GLint x, GLint y);
+GL_APICALL void GL_APIENTRY glProgramUniform3iEXT (GLuint program, GLint location, GLint x, GLint y, GLint z);
+GL_APICALL void GL_APIENTRY glProgramUniform4iEXT (GLuint program, GLint location, GLint x, GLint y, GLint z, GLint w);
+GL_APICALL void GL_APIENTRY glProgramUniform1fEXT (GLuint program, GLint location, GLfloat x);
+GL_APICALL void GL_APIENTRY glProgramUniform2fEXT (GLuint program, GLint location, GLfloat x, GLfloat y);
+GL_APICALL void GL_APIENTRY glProgramUniform3fEXT (GLuint program, GLint location, GLfloat x, GLfloat y, GLfloat z);
+GL_APICALL void GL_APIENTRY glProgramUniform4fEXT (GLuint program, GLint location, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+GL_APICALL void GL_APIENTRY glProgramUniform1ivEXT (GLuint program, GLint location, GLsizei count, const GLint *value);
+GL_APICALL void GL_APIENTRY glProgramUniform2ivEXT (GLuint program, GLint location, GLsizei count, const GLint *value);
+GL_APICALL void GL_APIENTRY glProgramUniform3ivEXT (GLuint program, GLint location, GLsizei count, const GLint *value);
+GL_APICALL void GL_APIENTRY glProgramUniform4ivEXT (GLuint program, GLint location, GLsizei count, const GLint *value);
+GL_APICALL void GL_APIENTRY glProgramUniform1fvEXT (GLuint program, GLint location, GLsizei count, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glProgramUniform2fvEXT (GLuint program, GLint location, GLsizei count, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glProgramUniform3fvEXT (GLuint program, GLint location, GLsizei count, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glProgramUniform4fvEXT (GLuint program, GLint location, GLsizei count, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glProgramUniformMatrix2fvEXT (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glProgramUniformMatrix3fvEXT (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glProgramUniformMatrix4fvEXT (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glValidateProgramPipelineEXT (GLuint pipeline);
+GL_APICALL void GL_APIENTRY glGetProgramPipelineInfoLogEXT (GLuint pipeline, GLsizei bufSize, GLsizei *length, GLchar *infoLog);
+#endif
+typedef void (GL_APIENTRYP PFNGLUSEPROGRAMSTAGESEXTPROC) (GLuint pipeline, GLbitfield stages, GLuint program);
+typedef void (GL_APIENTRYP PFNGLACTIVESHADERPROGRAMEXTPROC) (GLuint pipeline, GLuint program);
+typedef GLuint (GL_APIENTRYP PFNGLCREATESHADERPROGRAMVEXTPROC) (GLenum type, GLsizei count, const GLchar **strings);
+typedef void (GL_APIENTRYP PFNGLBINDPROGRAMPIPELINEEXTPROC) (GLuint pipeline);
+typedef void (GL_APIENTRYP PFNGLDELETEPROGRAMPIPELINESEXTPROC) (GLsizei n, const GLuint *pipelines);
+typedef void (GL_APIENTRYP PFNGLGENPROGRAMPIPELINESEXTPROC) (GLsizei n, GLuint *pipelines);
+typedef GLboolean (GL_APIENTRYP PFNGLISPROGRAMPIPELINEEXTPROC) (GLuint pipeline);
+typedef void (GL_APIENTRYP PFNGLPROGRAMPARAMETERIEXTPROC) (GLuint program, GLenum pname, GLint value);
+typedef void (GL_APIENTRYP PFNGLGETPROGRAMPIPELINEIVEXTPROC) (GLuint pipeline, GLenum pname, GLint *params);
+typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM1IEXTPROC) (GLuint program, GLint location, GLint x);
+typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM2IEXTPROC) (GLuint program, GLint location, GLint x, GLint y);
+typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM3IEXTPROC) (GLuint program, GLint location, GLint x, GLint y, GLint z);
+typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM4IEXTPROC) (GLuint program, GLint location, GLint x, GLint y, GLint z, GLint w);
+typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM1FEXTPROC) (GLuint program, GLint location, GLfloat x);
+typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM2FEXTPROC) (GLuint program, GLint location, GLfloat x, GLfloat y);
+typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM3FEXTPROC) (GLuint program, GLint location, GLfloat x, GLfloat y, GLfloat z);
+typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM4FEXTPROC) (GLuint program, GLint location, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM1IVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLint *value);
+typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM2IVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLint *value);
+typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM3IVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLint *value);
+typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM4IVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLint *value);
+typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM1FVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLfloat *value);
+typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM2FVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLfloat *value);
+typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM3FVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLfloat *value);
+typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM4FVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLfloat *value);
+typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORMMATRIX2FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORMMATRIX3FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORMMATRIX4FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+typedef void (GL_APIENTRYP PFNGLVALIDATEPROGRAMPIPELINEEXTPROC) (GLuint pipeline);
+typedef void (GL_APIENTRYP PFNGLGETPROGRAMPIPELINEINFOLOGEXTPROC) (GLuint pipeline, GLsizei bufSize, GLsizei *length, GLchar *infoLog);
+#endif
+
+/* GL_EXT_shader_framebuffer_fetch */
+#ifndef GL_EXT_shader_framebuffer_fetch
+#define GL_EXT_shader_framebuffer_fetch 1
+#endif
+
+/* GL_EXT_shader_texture_lod */
+#ifndef GL_EXT_shader_texture_lod
+#define GL_EXT_shader_texture_lod 1
+#endif
+
+/* GL_EXT_shadow_samplers */
+#ifndef GL_EXT_shadow_samplers
+#define GL_EXT_shadow_samplers 1
+#endif
+
+/* GL_EXT_sRGB */
+#ifndef GL_EXT_sRGB
+#define GL_EXT_sRGB 1
+#endif
+
+/* GL_EXT_texture_compression_dxt1 */
+#ifndef GL_EXT_texture_compression_dxt1
+#define GL_EXT_texture_compression_dxt1 1
+#endif
+
+/* GL_EXT_texture_filter_anisotropic */
+#ifndef GL_EXT_texture_filter_anisotropic
+#define GL_EXT_texture_filter_anisotropic 1
+#endif
+
+/* GL_EXT_texture_format_BGRA8888 */
+#ifndef GL_EXT_texture_format_BGRA8888
+#define GL_EXT_texture_format_BGRA8888 1
+#endif
+
+/* GL_EXT_texture_rg */
+#ifndef GL_EXT_texture_rg
+#define GL_EXT_texture_rg 1
+#endif
+
+/* GL_EXT_texture_storage */
+#ifndef GL_EXT_texture_storage
+#define GL_EXT_texture_storage 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glTexStorage1DEXT (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width);
+GL_APICALL void GL_APIENTRY glTexStorage2DEXT (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
+GL_APICALL void GL_APIENTRY glTexStorage3DEXT (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
+GL_APICALL void GL_APIENTRY glTextureStorage1DEXT (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width);
+GL_APICALL void GL_APIENTRY glTextureStorage2DEXT (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
+GL_APICALL void GL_APIENTRY glTextureStorage3DEXT (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
+#endif
+typedef void (GL_APIENTRYP PFNGLTEXSTORAGE1DEXTPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width);
+typedef void (GL_APIENTRYP PFNGLTEXSTORAGE2DEXTPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (GL_APIENTRYP PFNGLTEXSTORAGE3DEXTPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
+typedef void (GL_APIENTRYP PFNGLTEXTURESTORAGE1DEXTPROC) (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width);
+typedef void (GL_APIENTRYP PFNGLTEXTURESTORAGE2DEXTPROC) (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (GL_APIENTRYP PFNGLTEXTURESTORAGE3DEXTPROC) (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
+#endif
+
+/* GL_EXT_texture_type_2_10_10_10_REV */
+#ifndef GL_EXT_texture_type_2_10_10_10_REV
+#define GL_EXT_texture_type_2_10_10_10_REV 1
+#endif
+
+/* GL_EXT_unpack_subimage */
+#ifndef GL_EXT_unpack_subimage
+#define GL_EXT_unpack_subimage 1
+#endif
+
+/*------------------------------------------------------------------------*
+ * DMP extension functions
+ *------------------------------------------------------------------------*/
+
+/* GL_DMP_shader_binary */
+#ifndef GL_DMP_shader_binary
+#define GL_DMP_shader_binary 1
+#endif
+
+/*------------------------------------------------------------------------*
+ * FJ extension functions
+ *------------------------------------------------------------------------*/
+
+/* GL_FJ_shader_binary_GCCSO */
+#ifndef GL_FJ_shader_binary_GCCSO
+#define GL_FJ_shader_binary_GCCSO 1
+#endif
+
+/*------------------------------------------------------------------------*
+ * IMG extension functions
+ *------------------------------------------------------------------------*/
+
+/* GL_IMG_program_binary */
+#ifndef GL_IMG_program_binary
+#define GL_IMG_program_binary 1
+#endif
+
+/* GL_IMG_read_format */
+#ifndef GL_IMG_read_format
+#define GL_IMG_read_format 1
+#endif
+
+/* GL_IMG_shader_binary */
+#ifndef GL_IMG_shader_binary
+#define GL_IMG_shader_binary 1
+#endif
+
+/* GL_IMG_texture_compression_pvrtc */
+#ifndef GL_IMG_texture_compression_pvrtc
+#define GL_IMG_texture_compression_pvrtc 1
+#endif
+
+/* GL_IMG_multisampled_render_to_texture */
+#ifndef GL_IMG_multisampled_render_to_texture
+#define GL_IMG_multisampled_render_to_texture 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glRenderbufferStorageMultisampleIMG (GLenum, GLsizei, GLenum, GLsizei, GLsizei);
+GL_APICALL void GL_APIENTRY glFramebufferTexture2DMultisampleIMG (GLenum, GLenum, GLenum, GLuint, GLint, GLsizei);
+#endif
+typedef void (GL_APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEIMGPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (GL_APIENTRYP PFNGLFRAMEBUFFERTEXTURE2DMULTISAMPLEIMGPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLsizei samples);
+#endif
+
+/*------------------------------------------------------------------------*
+ * NV extension functions
+ *------------------------------------------------------------------------*/
+
+/* GL_NV_coverage_sample */
+#ifndef GL_NV_coverage_sample
+#define GL_NV_coverage_sample 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glCoverageMaskNV (GLboolean mask);
+GL_APICALL void GL_APIENTRY glCoverageOperationNV (GLenum operation);
+#endif
+typedef void (GL_APIENTRYP PFNGLCOVERAGEMASKNVPROC) (GLboolean mask);
+typedef void (GL_APIENTRYP PFNGLCOVERAGEOPERATIONNVPROC) (GLenum operation);
+#endif
+
+/* GL_NV_depth_nonlinear */
+#ifndef GL_NV_depth_nonlinear
+#define GL_NV_depth_nonlinear 1
+#endif
+
+/* GL_NV_draw_buffers */
+#ifndef GL_NV_draw_buffers
+#define GL_NV_draw_buffers 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glDrawBuffersNV (GLsizei n, const GLenum *bufs);
+#endif
+typedef void (GL_APIENTRYP PFNGLDRAWBUFFERSNVPROC) (GLsizei n, const GLenum *bufs);
+#endif
+
+/* GL_NV_fbo_color_attachments */
+#ifndef GL_NV_fbo_color_attachments
+#define GL_NV_fbo_color_attachments 1
+#endif
+
+/* GL_NV_fence */
+#ifndef GL_NV_fence
+#define GL_NV_fence 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glDeleteFencesNV (GLsizei, const GLuint *);
+GL_APICALL void GL_APIENTRY glGenFencesNV (GLsizei, GLuint *);
+GL_APICALL GLboolean GL_APIENTRY glIsFenceNV (GLuint);
+GL_APICALL GLboolean GL_APIENTRY glTestFenceNV (GLuint);
+GL_APICALL void GL_APIENTRY glGetFenceivNV (GLuint, GLenum, GLint *);
+GL_APICALL void GL_APIENTRY glFinishFenceNV (GLuint);
+GL_APICALL void GL_APIENTRY glSetFenceNV (GLuint, GLenum);
+#endif
+typedef void (GL_APIENTRYP PFNGLDELETEFENCESNVPROC) (GLsizei n, const GLuint *fences);
+typedef void (GL_APIENTRYP PFNGLGENFENCESNVPROC) (GLsizei n, GLuint *fences);
+typedef GLboolean (GL_APIENTRYP PFNGLISFENCENVPROC) (GLuint fence);
+typedef GLboolean (GL_APIENTRYP PFNGLTESTFENCENVPROC) (GLuint fence);
+typedef void (GL_APIENTRYP PFNGLGETFENCEIVNVPROC) (GLuint fence, GLenum pname, GLint *params);
+typedef void (GL_APIENTRYP PFNGLFINISHFENCENVPROC) (GLuint fence);
+typedef void (GL_APIENTRYP PFNGLSETFENCENVPROC) (GLuint fence, GLenum condition);
+#endif
+
+/* GL_NV_read_buffer */
+#ifndef GL_NV_read_buffer
+#define GL_NV_read_buffer 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glReadBufferNV (GLenum mode);
+#endif
+typedef void (GL_APIENTRYP PFNGLREADBUFFERNVPROC) (GLenum mode);
+#endif
+
+/* GL_NV_read_buffer_front */
+#ifndef GL_NV_read_buffer_front
+#define GL_NV_read_buffer_front 1
+#endif
+
+/* GL_NV_read_depth */
+#ifndef GL_NV_read_depth
+#define GL_NV_read_depth 1
+#endif
+
+/* GL_NV_read_depth_stencil */
+#ifndef GL_NV_read_depth_stencil
+#define GL_NV_read_depth_stencil 1
+#endif
+
+/* GL_NV_read_stencil */
+#ifndef GL_NV_read_stencil
+#define GL_NV_read_stencil 1
+#endif
+
+/* GL_NV_texture_compression_s3tc_update */
+#ifndef GL_NV_texture_compression_s3tc_update
+#define GL_NV_texture_compression_s3tc_update 1
+#endif
+
+/* GL_NV_texture_npot_2D_mipmap */
+#ifndef GL_NV_texture_npot_2D_mipmap
+#define GL_NV_texture_npot_2D_mipmap 1
+#endif
+
+/*------------------------------------------------------------------------*
+ * QCOM extension functions
+ *------------------------------------------------------------------------*/
+
+/* GL_QCOM_alpha_test */
+#ifndef GL_QCOM_alpha_test
+#define GL_QCOM_alpha_test 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glAlphaFuncQCOM (GLenum func, GLclampf ref);
+#endif
+typedef void (GL_APIENTRYP PFNGLALPHAFUNCQCOMPROC) (GLenum func, GLclampf ref);
+#endif
+
+/* GL_QCOM_binning_control */
+#ifndef GL_QCOM_binning_control
+#define GL_QCOM_binning_control 1
+#endif
+
+/* GL_QCOM_driver_control */
+#ifndef GL_QCOM_driver_control
+#define GL_QCOM_driver_control 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glGetDriverControlsQCOM (GLint *num, GLsizei size, GLuint *driverControls);
+GL_APICALL void GL_APIENTRY glGetDriverControlStringQCOM (GLuint driverControl, GLsizei bufSize, GLsizei *length, GLchar *driverControlString);
+GL_APICALL void GL_APIENTRY glEnableDriverControlQCOM (GLuint driverControl);
+GL_APICALL void GL_APIENTRY glDisableDriverControlQCOM (GLuint driverControl);
+#endif
+typedef void (GL_APIENTRYP PFNGLGETDRIVERCONTROLSQCOMPROC) (GLint *num, GLsizei size, GLuint *driverControls);
+typedef void (GL_APIENTRYP PFNGLGETDRIVERCONTROLSTRINGQCOMPROC) (GLuint driverControl, GLsizei bufSize, GLsizei *length, GLchar *driverControlString);
+typedef void (GL_APIENTRYP PFNGLENABLEDRIVERCONTROLQCOMPROC) (GLuint driverControl);
+typedef void (GL_APIENTRYP PFNGLDISABLEDRIVERCONTROLQCOMPROC) (GLuint driverControl);
+#endif
+
+/* GL_QCOM_extended_get */
+#ifndef GL_QCOM_extended_get
+#define GL_QCOM_extended_get 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glExtGetTexturesQCOM (GLuint *textures, GLint maxTextures, GLint *numTextures);
+GL_APICALL void GL_APIENTRY glExtGetBuffersQCOM (GLuint *buffers, GLint maxBuffers, GLint *numBuffers);
+GL_APICALL void GL_APIENTRY glExtGetRenderbuffersQCOM (GLuint *renderbuffers, GLint maxRenderbuffers, GLint *numRenderbuffers);
+GL_APICALL void GL_APIENTRY glExtGetFramebuffersQCOM (GLuint *framebuffers, GLint maxFramebuffers, GLint *numFramebuffers);
+GL_APICALL void GL_APIENTRY glExtGetTexLevelParameterivQCOM (GLuint texture, GLenum face, GLint level, GLenum pname, GLint *params);
+GL_APICALL void GL_APIENTRY glExtTexObjectStateOverrideiQCOM (GLenum target, GLenum pname, GLint param);
+GL_APICALL void GL_APIENTRY glExtGetTexSubImageQCOM (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, GLvoid *texels);
+GL_APICALL void GL_APIENTRY glExtGetBufferPointervQCOM (GLenum target, GLvoid **params);
+#endif
+typedef void (GL_APIENTRYP PFNGLEXTGETTEXTURESQCOMPROC) (GLuint *textures, GLint maxTextures, GLint *numTextures);
+typedef void (GL_APIENTRYP PFNGLEXTGETBUFFERSQCOMPROC) (GLuint *buffers, GLint maxBuffers, GLint *numBuffers);
+typedef void (GL_APIENTRYP PFNGLEXTGETRENDERBUFFERSQCOMPROC) (GLuint *renderbuffers, GLint maxRenderbuffers, GLint *numRenderbuffers);
+typedef void (GL_APIENTRYP PFNGLEXTGETFRAMEBUFFERSQCOMPROC) (GLuint *framebuffers, GLint maxFramebuffers, GLint *numFramebuffers);
+typedef void (GL_APIENTRYP PFNGLEXTGETTEXLEVELPARAMETERIVQCOMPROC) (GLuint texture, GLenum face, GLint level, GLenum pname, GLint *params);
+typedef void (GL_APIENTRYP PFNGLEXTTEXOBJECTSTATEOVERRIDEIQCOMPROC) (GLenum target, GLenum pname, GLint param);
+typedef void (GL_APIENTRYP PFNGLEXTGETTEXSUBIMAGEQCOMPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, GLvoid *texels);
+typedef void (GL_APIENTRYP PFNGLEXTGETBUFFERPOINTERVQCOMPROC) (GLenum target, GLvoid **params);
+#endif
+
+/* GL_QCOM_extended_get2 */
+#ifndef GL_QCOM_extended_get2
+#define GL_QCOM_extended_get2 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glExtGetShadersQCOM (GLuint *shaders, GLint maxShaders, GLint *numShaders);
+GL_APICALL void GL_APIENTRY glExtGetProgramsQCOM (GLuint *programs, GLint maxPrograms, GLint *numPrograms);
+GL_APICALL GLboolean GL_APIENTRY glExtIsProgramBinaryQCOM (GLuint program);
+GL_APICALL void GL_APIENTRY glExtGetProgramBinarySourceQCOM (GLuint program, GLenum shadertype, GLchar *source, GLint *length);
+#endif
+typedef void (GL_APIENTRYP PFNGLEXTGETSHADERSQCOMPROC) (GLuint *shaders, GLint maxShaders, GLint *numShaders);
+typedef void (GL_APIENTRYP PFNGLEXTGETPROGRAMSQCOMPROC) (GLuint *programs, GLint maxPrograms, GLint *numPrograms);
+typedef GLboolean (GL_APIENTRYP PFNGLEXTISPROGRAMBINARYQCOMPROC) (GLuint program);
+typedef void (GL_APIENTRYP PFNGLEXTGETPROGRAMBINARYSOURCEQCOMPROC) (GLuint program, GLenum shadertype, GLchar *source, GLint *length);
+#endif
+
+/* GL_QCOM_perfmon_global_mode */
+#ifndef GL_QCOM_perfmon_global_mode
+#define GL_QCOM_perfmon_global_mode 1
+#endif
+
+/* GL_QCOM_writeonly_rendering */
+#ifndef GL_QCOM_writeonly_rendering
+#define GL_QCOM_writeonly_rendering 1
+#endif
+
+/* GL_QCOM_tiled_rendering */
+#ifndef GL_QCOM_tiled_rendering
+#define GL_QCOM_tiled_rendering 1
+#ifdef GL_GLEXT_PROTOTYPES
+GL_APICALL void GL_APIENTRY glStartTilingQCOM (GLuint x, GLuint y, GLuint width, GLuint height, GLbitfield preserveMask);
+GL_APICALL void GL_APIENTRY glEndTilingQCOM (GLbitfield preserveMask);
+#endif
+typedef void (GL_APIENTRYP PFNGLSTARTTILINGQCOMPROC) (GLuint x, GLuint y, GLuint width, GLuint height, GLbitfield preserveMask);
+typedef void (GL_APIENTRYP PFNGLENDTILINGQCOMPROC) (GLbitfield preserveMask);
+#endif
+
+/*------------------------------------------------------------------------*
+ * VIV extension tokens
+ *------------------------------------------------------------------------*/
+
+/* GL_VIV_shader_binary */
+#ifndef GL_VIV_shader_binary
+#define GL_VIV_shader_binary 1
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __gl2ext_h_ */
diff --git a/opengles-3.1/include/GLES2/gl2platform.h b/opengles-3.1/include/GLES2/gl2platform.h
new file mode 100644
index 0000000000..c9fa3c4d64
--- /dev/null
+++ b/opengles-3.1/include/GLES2/gl2platform.h
@@ -0,0 +1,30 @@
+#ifndef __gl2platform_h_
+#define __gl2platform_h_
+
+/* $Revision: 10602 $ on $Date:: 2010-03-04 22:35:34 -0800 #$ */
+
+/*
+ * This document is licensed under the SGI Free Software B License Version
+ * 2.0. For details, see http://oss.sgi.com/projects/FreeB/ .
+ */
+
+/* Platform-specific types and definitions for OpenGL ES 2.X  gl2.h
+ *
+ * Adopters may modify khrplatform.h and this file to suit their platform.
+ * You are encouraged to submit all modifications to the Khronos group so that
+ * they can be included in future versions of this file.  Please submit changes
+ * by sending them to the public Khronos Bugzilla (http://khronos.org/bugzilla)
+ * by filing a bug against product "OpenGL-ES" component "Registry".
+ */
+
+#include <KHR/khrplatform.h>
+
+#ifndef GL_APICALL
+#define GL_APICALL  KHRONOS_APICALL
+#endif
+
+#ifndef GL_APIENTRY
+#define GL_APIENTRY KHRONOS_APIENTRY
+#endif
+
+#endif /* __gl2platform_h_ */
diff --git a/opengles-3.1/include/GLES3/gl3.h b/opengles-3.1/include/GLES3/gl3.h
new file mode 100644
index 0000000000..9c79862c0d
--- /dev/null
+++ b/opengles-3.1/include/GLES3/gl3.h
@@ -0,0 +1,1061 @@
+#ifndef __gl3_h_
+#define __gl3_h_
+
+/* 
+ * gl3.h last updated on $Date: 2013-02-12 14:37:24 -0800 (Tue, 12 Feb 2013) $
+ */
+
+#include <GLES3/gl3platform.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** Copyright (c) 2007-2013 The Khronos Group Inc.
+**
+** Permission is hereby granted, free of charge, to any person obtaining a
+** copy of this software and/or associated documentation files (the
+** "Materials"), to deal in the Materials without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Materials, and to
+** permit persons to whom the Materials are furnished to do so, subject to
+** the following conditions:
+**
+** The above copyright notice and this permission notice shall be included
+** in all copies or substantial portions of the Materials.
+**
+** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+*/
+
+/*-------------------------------------------------------------------------
+ * Data type definitions
+ *-----------------------------------------------------------------------*/
+
+/* OpenGL ES 2.0 */
+
+typedef void             GLvoid;
+typedef char             GLchar;
+typedef unsigned int     GLenum;
+typedef unsigned char    GLboolean;
+typedef unsigned int     GLbitfield;
+typedef khronos_int8_t   GLbyte;
+typedef short            GLshort;
+typedef int              GLint;
+typedef int              GLsizei;
+typedef khronos_uint8_t  GLubyte;
+typedef unsigned short   GLushort;
+typedef unsigned int     GLuint;
+typedef khronos_float_t  GLfloat;
+typedef khronos_float_t  GLclampf;
+typedef khronos_int32_t  GLfixed;
+typedef khronos_intptr_t GLintptr;
+typedef khronos_ssize_t  GLsizeiptr;
+
+/* OpenGL ES 3.0 */
+
+typedef unsigned short   GLhalf;
+typedef khronos_int64_t  GLint64;
+typedef khronos_uint64_t GLuint64;
+typedef struct __GLsync *GLsync;
+
+/*-------------------------------------------------------------------------
+ * Token definitions
+ *-----------------------------------------------------------------------*/
+
+/* OpenGL ES core versions */
+#define GL_ES_VERSION_3_0                                1
+#define GL_ES_VERSION_2_0                                1
+
+/* OpenGL ES 2.0 */
+
+/* ClearBufferMask */
+#define GL_DEPTH_BUFFER_BIT                              0x00000100
+#define GL_STENCIL_BUFFER_BIT                            0x00000400
+#define GL_COLOR_BUFFER_BIT                              0x00004000
+
+/* Boolean */
+#define GL_FALSE                                         0
+#define GL_TRUE                                          1
+
+/* BeginMode */
+#define GL_POINTS                                        0x0000
+#define GL_LINES                                         0x0001
+#define GL_LINE_LOOP                                     0x0002
+#define GL_LINE_STRIP                                    0x0003
+#define GL_TRIANGLES                                     0x0004
+#define GL_TRIANGLE_STRIP                                0x0005
+#define GL_TRIANGLE_FAN                                  0x0006
+
+/* BlendingFactorDest */
+#define GL_ZERO                                          0
+#define GL_ONE                                           1
+#define GL_SRC_COLOR                                     0x0300
+#define GL_ONE_MINUS_SRC_COLOR                           0x0301
+#define GL_SRC_ALPHA                                     0x0302
+#define GL_ONE_MINUS_SRC_ALPHA                           0x0303
+#define GL_DST_ALPHA                                     0x0304
+#define GL_ONE_MINUS_DST_ALPHA                           0x0305
+
+/* BlendingFactorSrc */
+/*      GL_ZERO */
+/*      GL_ONE */
+#define GL_DST_COLOR                                     0x0306
+#define GL_ONE_MINUS_DST_COLOR                           0x0307
+#define GL_SRC_ALPHA_SATURATE                            0x0308
+/*      GL_SRC_ALPHA */
+/*      GL_ONE_MINUS_SRC_ALPHA */
+/*      GL_DST_ALPHA */
+/*      GL_ONE_MINUS_DST_ALPHA */
+
+/* BlendEquationSeparate */
+#define GL_FUNC_ADD                                      0x8006
+#define GL_BLEND_EQUATION                                0x8009
+#define GL_BLEND_EQUATION_RGB                            0x8009    /* same as BLEND_EQUATION */
+#define GL_BLEND_EQUATION_ALPHA                          0x883D
+
+/* BlendSubtract */
+#define GL_FUNC_SUBTRACT                                 0x800A
+#define GL_FUNC_REVERSE_SUBTRACT                         0x800B
+
+/* Separate Blend Functions */
+#define GL_BLEND_DST_RGB                                 0x80C8
+#define GL_BLEND_SRC_RGB                                 0x80C9
+#define GL_BLEND_DST_ALPHA                               0x80CA
+#define GL_BLEND_SRC_ALPHA                               0x80CB
+#define GL_CONSTANT_COLOR                                0x8001
+#define GL_ONE_MINUS_CONSTANT_COLOR                      0x8002
+#define GL_CONSTANT_ALPHA                                0x8003
+#define GL_ONE_MINUS_CONSTANT_ALPHA                      0x8004
+#define GL_BLEND_COLOR                                   0x8005
+
+/* Buffer Objects */
+#define GL_ARRAY_BUFFER                                  0x8892
+#define GL_ELEMENT_ARRAY_BUFFER                          0x8893
+#define GL_ARRAY_BUFFER_BINDING                          0x8894
+#define GL_ELEMENT_ARRAY_BUFFER_BINDING                  0x8895
+
+#define GL_STREAM_DRAW                                   0x88E0
+#define GL_STATIC_DRAW                                   0x88E4
+#define GL_DYNAMIC_DRAW                                  0x88E8
+
+#define GL_BUFFER_SIZE                                   0x8764
+#define GL_BUFFER_USAGE                                  0x8765
+
+#define GL_CURRENT_VERTEX_ATTRIB                         0x8626
+
+/* CullFaceMode */
+#define GL_FRONT                                         0x0404
+#define GL_BACK                                          0x0405
+#define GL_FRONT_AND_BACK                                0x0408
+
+/* DepthFunction */
+/*      GL_NEVER */
+/*      GL_LESS */
+/*      GL_EQUAL */
+/*      GL_LEQUAL */
+/*      GL_GREATER */
+/*      GL_NOTEQUAL */
+/*      GL_GEQUAL */
+/*      GL_ALWAYS */
+
+/* EnableCap */
+#define GL_TEXTURE_2D                                    0x0DE1
+#define GL_CULL_FACE                                     0x0B44
+#define GL_BLEND                                         0x0BE2
+#define GL_DITHER                                        0x0BD0
+#define GL_STENCIL_TEST                                  0x0B90
+#define GL_DEPTH_TEST                                    0x0B71
+#define GL_SCISSOR_TEST                                  0x0C11
+#define GL_POLYGON_OFFSET_FILL                           0x8037
+#define GL_SAMPLE_ALPHA_TO_COVERAGE                      0x809E
+#define GL_SAMPLE_COVERAGE                               0x80A0
+
+/* ErrorCode */
+#define GL_NO_ERROR                                      0
+#define GL_INVALID_ENUM                                  0x0500
+#define GL_INVALID_VALUE                                 0x0501
+#define GL_INVALID_OPERATION                             0x0502
+#define GL_OUT_OF_MEMORY                                 0x0505
+
+/* FrontFaceDirection */
+#define GL_CW                                            0x0900
+#define GL_CCW                                           0x0901
+
+/* GetPName */
+#define GL_LINE_WIDTH                                    0x0B21
+#define GL_ALIASED_POINT_SIZE_RANGE                      0x846D
+#define GL_ALIASED_LINE_WIDTH_RANGE                      0x846E
+#define GL_CULL_FACE_MODE                                0x0B45
+#define GL_FRONT_FACE                                    0x0B46
+#define GL_DEPTH_RANGE                                   0x0B70
+#define GL_DEPTH_WRITEMASK                               0x0B72
+#define GL_DEPTH_CLEAR_VALUE                             0x0B73
+#define GL_DEPTH_FUNC                                    0x0B74
+#define GL_STENCIL_CLEAR_VALUE                           0x0B91
+#define GL_STENCIL_FUNC                                  0x0B92
+#define GL_STENCIL_FAIL                                  0x0B94
+#define GL_STENCIL_PASS_DEPTH_FAIL                       0x0B95
+#define GL_STENCIL_PASS_DEPTH_PASS                       0x0B96
+#define GL_STENCIL_REF                                   0x0B97
+#define GL_STENCIL_VALUE_MASK                            0x0B93
+#define GL_STENCIL_WRITEMASK                             0x0B98
+#define GL_STENCIL_BACK_FUNC                             0x8800
+#define GL_STENCIL_BACK_FAIL                             0x8801
+#define GL_STENCIL_BACK_PASS_DEPTH_FAIL                  0x8802
+#define GL_STENCIL_BACK_PASS_DEPTH_PASS                  0x8803
+#define GL_STENCIL_BACK_REF                              0x8CA3
+#define GL_STENCIL_BACK_VALUE_MASK                       0x8CA4
+#define GL_STENCIL_BACK_WRITEMASK                        0x8CA5
+#define GL_VIEWPORT                                      0x0BA2
+#define GL_SCISSOR_BOX                                   0x0C10
+/*      GL_SCISSOR_TEST */
+#define GL_COLOR_CLEAR_VALUE                             0x0C22
+#define GL_COLOR_WRITEMASK                               0x0C23
+#define GL_UNPACK_ALIGNMENT                              0x0CF5
+#define GL_PACK_ALIGNMENT                                0x0D05
+#define GL_MAX_TEXTURE_SIZE                              0x0D33
+#define GL_MAX_VIEWPORT_DIMS                             0x0D3A
+#define GL_SUBPIXEL_BITS                                 0x0D50
+#define GL_RED_BITS                                      0x0D52
+#define GL_GREEN_BITS                                    0x0D53
+#define GL_BLUE_BITS                                     0x0D54
+#define GL_ALPHA_BITS                                    0x0D55
+#define GL_DEPTH_BITS                                    0x0D56
+#define GL_STENCIL_BITS                                  0x0D57
+#define GL_POLYGON_OFFSET_UNITS                          0x2A00
+/*      GL_POLYGON_OFFSET_FILL */
+#define GL_POLYGON_OFFSET_FACTOR                         0x8038
+#define GL_TEXTURE_BINDING_2D                            0x8069
+#define GL_SAMPLE_BUFFERS                                0x80A8
+#define GL_SAMPLES                                       0x80A9
+#define GL_SAMPLE_COVERAGE_VALUE                         0x80AA
+#define GL_SAMPLE_COVERAGE_INVERT                        0x80AB
+
+/* GetTextureParameter */
+/*      GL_TEXTURE_MAG_FILTER */
+/*      GL_TEXTURE_MIN_FILTER */
+/*      GL_TEXTURE_WRAP_S */
+/*      GL_TEXTURE_WRAP_T */
+
+#define GL_NUM_COMPRESSED_TEXTURE_FORMATS                0x86A2
+#define GL_COMPRESSED_TEXTURE_FORMATS                    0x86A3
+
+/* HintMode */
+#define GL_DONT_CARE                                     0x1100
+#define GL_FASTEST                                       0x1101
+#define GL_NICEST                                        0x1102
+
+/* HintTarget */
+#define GL_GENERATE_MIPMAP_HINT                          0x8192
+
+/* DataType */
+#define GL_BYTE                                          0x1400
+#define GL_UNSIGNED_BYTE                                 0x1401
+#define GL_SHORT                                         0x1402
+#define GL_UNSIGNED_SHORT                                0x1403
+#define GL_INT                                           0x1404
+#define GL_UNSIGNED_INT                                  0x1405
+#define GL_FLOAT                                         0x1406
+#define GL_FIXED                                         0x140C
+
+/* PixelFormat */
+#define GL_DEPTH_COMPONENT                               0x1902
+#define GL_ALPHA                                         0x1906
+#define GL_RGB                                           0x1907
+#define GL_RGBA                                          0x1908
+#define GL_LUMINANCE                                     0x1909
+#define GL_LUMINANCE_ALPHA                               0x190A
+
+/* PixelType */
+/*      GL_UNSIGNED_BYTE */
+#define GL_UNSIGNED_SHORT_4_4_4_4                        0x8033
+#define GL_UNSIGNED_SHORT_5_5_5_1                        0x8034
+#define GL_UNSIGNED_SHORT_5_6_5                          0x8363
+
+/* Shaders */
+#define GL_FRAGMENT_SHADER                               0x8B30
+#define GL_VERTEX_SHADER                                 0x8B31
+#define GL_MAX_VERTEX_ATTRIBS                            0x8869
+#define GL_MAX_VERTEX_UNIFORM_VECTORS                    0x8DFB
+#define GL_MAX_VARYING_VECTORS                           0x8DFC
+#define GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS              0x8B4D
+#define GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS                0x8B4C
+#define GL_MAX_TEXTURE_IMAGE_UNITS                       0x8872
+#define GL_MAX_FRAGMENT_UNIFORM_VECTORS                  0x8DFD
+#define GL_SHADER_TYPE                                   0x8B4F
+#define GL_DELETE_STATUS                                 0x8B80
+#define GL_LINK_STATUS                                   0x8B82
+#define GL_VALIDATE_STATUS                               0x8B83
+#define GL_ATTACHED_SHADERS                              0x8B85
+#define GL_ACTIVE_UNIFORMS                               0x8B86
+#define GL_ACTIVE_UNIFORM_MAX_LENGTH                     0x8B87
+#define GL_ACTIVE_ATTRIBUTES                             0x8B89
+#define GL_ACTIVE_ATTRIBUTE_MAX_LENGTH                   0x8B8A
+#define GL_SHADING_LANGUAGE_VERSION                      0x8B8C
+#define GL_CURRENT_PROGRAM                               0x8B8D
+
+/* StencilFunction */
+#define GL_NEVER                                         0x0200
+#define GL_LESS                                          0x0201
+#define GL_EQUAL                                         0x0202
+#define GL_LEQUAL                                        0x0203
+#define GL_GREATER                                       0x0204
+#define GL_NOTEQUAL                                      0x0205
+#define GL_GEQUAL                                        0x0206
+#define GL_ALWAYS                                        0x0207
+
+/* StencilOp */
+/*      GL_ZERO */
+#define GL_KEEP                                          0x1E00
+#define GL_REPLACE                                       0x1E01
+#define GL_INCR                                          0x1E02
+#define GL_DECR                                          0x1E03
+#define GL_INVERT                                        0x150A
+#define GL_INCR_WRAP                                     0x8507
+#define GL_DECR_WRAP                                     0x8508
+
+/* StringName */
+#define GL_VENDOR                                        0x1F00
+#define GL_RENDERER                                      0x1F01
+#define GL_VERSION                                       0x1F02
+#define GL_EXTENSIONS                                    0x1F03
+
+/* TextureMagFilter */
+#define GL_NEAREST                                       0x2600
+#define GL_LINEAR                                        0x2601
+
+/* TextureMinFilter */
+/*      GL_NEAREST */
+/*      GL_LINEAR */
+#define GL_NEAREST_MIPMAP_NEAREST                        0x2700
+#define GL_LINEAR_MIPMAP_NEAREST                         0x2701
+#define GL_NEAREST_MIPMAP_LINEAR                         0x2702
+#define GL_LINEAR_MIPMAP_LINEAR                          0x2703
+
+/* TextureParameterName */
+#define GL_TEXTURE_MAG_FILTER                            0x2800
+#define GL_TEXTURE_MIN_FILTER                            0x2801
+#define GL_TEXTURE_WRAP_S                                0x2802
+#define GL_TEXTURE_WRAP_T                                0x2803
+
+/* TextureTarget */
+/*      GL_TEXTURE_2D */
+#define GL_TEXTURE                                       0x1702
+
+#define GL_TEXTURE_CUBE_MAP                              0x8513
+#define GL_TEXTURE_BINDING_CUBE_MAP                      0x8514
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_X                   0x8515
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X                   0x8516
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y                   0x8517
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y                   0x8518
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z                   0x8519
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z                   0x851A
+#define GL_MAX_CUBE_MAP_TEXTURE_SIZE                     0x851C
+
+/* TextureUnit */
+#define GL_TEXTURE0                                      0x84C0
+#define GL_TEXTURE1                                      0x84C1
+#define GL_TEXTURE2                                      0x84C2
+#define GL_TEXTURE3                                      0x84C3
+#define GL_TEXTURE4                                      0x84C4
+#define GL_TEXTURE5                                      0x84C5
+#define GL_TEXTURE6                                      0x84C6
+#define GL_TEXTURE7                                      0x84C7
+#define GL_TEXTURE8                                      0x84C8
+#define GL_TEXTURE9                                      0x84C9
+#define GL_TEXTURE10                                     0x84CA
+#define GL_TEXTURE11                                     0x84CB
+#define GL_TEXTURE12                                     0x84CC
+#define GL_TEXTURE13                                     0x84CD
+#define GL_TEXTURE14                                     0x84CE
+#define GL_TEXTURE15                                     0x84CF
+#define GL_TEXTURE16                                     0x84D0
+#define GL_TEXTURE17                                     0x84D1
+#define GL_TEXTURE18                                     0x84D2
+#define GL_TEXTURE19                                     0x84D3
+#define GL_TEXTURE20                                     0x84D4
+#define GL_TEXTURE21                                     0x84D5
+#define GL_TEXTURE22                                     0x84D6
+#define GL_TEXTURE23                                     0x84D7
+#define GL_TEXTURE24                                     0x84D8
+#define GL_TEXTURE25                                     0x84D9
+#define GL_TEXTURE26                                     0x84DA
+#define GL_TEXTURE27                                     0x84DB
+#define GL_TEXTURE28                                     0x84DC
+#define GL_TEXTURE29                                     0x84DD
+#define GL_TEXTURE30                                     0x84DE
+#define GL_TEXTURE31                                     0x84DF
+#define GL_ACTIVE_TEXTURE                                0x84E0
+
+/* TextureWrapMode */
+#define GL_REPEAT                                        0x2901
+#define GL_CLAMP_TO_EDGE                                 0x812F
+#define GL_MIRRORED_REPEAT                               0x8370
+
+/* Uniform Types */
+#define GL_FLOAT_VEC2                                    0x8B50
+#define GL_FLOAT_VEC3                                    0x8B51
+#define GL_FLOAT_VEC4                                    0x8B52
+#define GL_INT_VEC2                                      0x8B53
+#define GL_INT_VEC3                                      0x8B54
+#define GL_INT_VEC4                                      0x8B55
+#define GL_BOOL                                          0x8B56
+#define GL_BOOL_VEC2                                     0x8B57
+#define GL_BOOL_VEC3                                     0x8B58
+#define GL_BOOL_VEC4                                     0x8B59
+#define GL_FLOAT_MAT2                                    0x8B5A
+#define GL_FLOAT_MAT3                                    0x8B5B
+#define GL_FLOAT_MAT4                                    0x8B5C
+#define GL_SAMPLER_2D                                    0x8B5E
+#define GL_SAMPLER_CUBE                                  0x8B60
+
+/* Vertex Arrays */
+#define GL_VERTEX_ATTRIB_ARRAY_ENABLED                   0x8622
+#define GL_VERTEX_ATTRIB_ARRAY_SIZE                      0x8623
+#define GL_VERTEX_ATTRIB_ARRAY_STRIDE                    0x8624
+#define GL_VERTEX_ATTRIB_ARRAY_TYPE                      0x8625
+#define GL_VERTEX_ATTRIB_ARRAY_NORMALIZED                0x886A
+#define GL_VERTEX_ATTRIB_ARRAY_POINTER                   0x8645
+#define GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING            0x889F
+
+/* Read Format */
+#define GL_IMPLEMENTATION_COLOR_READ_TYPE                0x8B9A
+#define GL_IMPLEMENTATION_COLOR_READ_FORMAT              0x8B9B
+
+/* Shader Source */
+#define GL_COMPILE_STATUS                                0x8B81
+#define GL_INFO_LOG_LENGTH                               0x8B84
+#define GL_SHADER_SOURCE_LENGTH                          0x8B88
+#define GL_SHADER_COMPILER                               0x8DFA
+
+/* Shader Binary */
+#define GL_SHADER_BINARY_FORMATS                         0x8DF8
+#define GL_NUM_SHADER_BINARY_FORMATS                     0x8DF9
+
+/* Shader Precision-Specified Types */
+#define GL_LOW_FLOAT                                     0x8DF0
+#define GL_MEDIUM_FLOAT                                  0x8DF1
+#define GL_HIGH_FLOAT                                    0x8DF2
+#define GL_LOW_INT                                       0x8DF3
+#define GL_MEDIUM_INT                                    0x8DF4
+#define GL_HIGH_INT                                      0x8DF5
+
+/* Framebuffer Object. */
+#define GL_FRAMEBUFFER                                   0x8D40
+#define GL_RENDERBUFFER                                  0x8D41
+
+#define GL_RGBA4                                         0x8056
+#define GL_RGB5_A1                                       0x8057
+#define GL_RGB565                                        0x8D62
+#define GL_DEPTH_COMPONENT16                             0x81A5
+#define GL_STENCIL_INDEX8                                0x8D48
+
+#define GL_RENDERBUFFER_WIDTH                            0x8D42
+#define GL_RENDERBUFFER_HEIGHT                           0x8D43
+#define GL_RENDERBUFFER_INTERNAL_FORMAT                  0x8D44
+#define GL_RENDERBUFFER_RED_SIZE                         0x8D50
+#define GL_RENDERBUFFER_GREEN_SIZE                       0x8D51
+#define GL_RENDERBUFFER_BLUE_SIZE                        0x8D52
+#define GL_RENDERBUFFER_ALPHA_SIZE                       0x8D53
+#define GL_RENDERBUFFER_DEPTH_SIZE                       0x8D54
+#define GL_RENDERBUFFER_STENCIL_SIZE                     0x8D55
+
+#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE            0x8CD0
+#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME            0x8CD1
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL          0x8CD2
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE  0x8CD3
+
+#define GL_COLOR_ATTACHMENT0                             0x8CE0
+#define GL_DEPTH_ATTACHMENT                              0x8D00
+#define GL_STENCIL_ATTACHMENT                            0x8D20
+
+#define GL_NONE                                          0
+
+#define GL_FRAMEBUFFER_COMPLETE                          0x8CD5
+#define GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT             0x8CD6
+#define GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT     0x8CD7
+#define GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS             0x8CD9
+#define GL_FRAMEBUFFER_UNSUPPORTED                       0x8CDD
+
+#define GL_FRAMEBUFFER_BINDING                           0x8CA6
+#define GL_RENDERBUFFER_BINDING                          0x8CA7
+#define GL_MAX_RENDERBUFFER_SIZE                         0x84E8
+
+#define GL_INVALID_FRAMEBUFFER_OPERATION                 0x0506
+
+/* OpenGL ES 3.0 */
+
+#define GL_READ_BUFFER                                   0x0C02
+#define GL_UNPACK_ROW_LENGTH                             0x0CF2
+#define GL_UNPACK_SKIP_ROWS                              0x0CF3
+#define GL_UNPACK_SKIP_PIXELS                            0x0CF4
+#define GL_PACK_ROW_LENGTH                               0x0D02
+#define GL_PACK_SKIP_ROWS                                0x0D03
+#define GL_PACK_SKIP_PIXELS                              0x0D04
+#define GL_COLOR                                         0x1800
+#define GL_DEPTH                                         0x1801
+#define GL_STENCIL                                       0x1802
+#define GL_RED                                           0x1903
+#define GL_RGB8                                          0x8051
+#define GL_RGBA8                                         0x8058
+#define GL_RGB10_A2                                      0x8059
+#define GL_TEXTURE_BINDING_3D                            0x806A
+#define GL_UNPACK_SKIP_IMAGES                            0x806D
+#define GL_UNPACK_IMAGE_HEIGHT                           0x806E
+#define GL_TEXTURE_3D                                    0x806F
+#define GL_TEXTURE_WRAP_R                                0x8072
+#define GL_MAX_3D_TEXTURE_SIZE                           0x8073
+#define GL_UNSIGNED_INT_2_10_10_10_REV                   0x8368
+#define GL_MAX_ELEMENTS_VERTICES                         0x80E8
+#define GL_MAX_ELEMENTS_INDICES                          0x80E9
+#define GL_TEXTURE_MIN_LOD                               0x813A
+#define GL_TEXTURE_MAX_LOD                               0x813B
+#define GL_TEXTURE_BASE_LEVEL                            0x813C
+#define GL_TEXTURE_MAX_LEVEL                             0x813D
+#define GL_MIN                                           0x8007
+#define GL_MAX                                           0x8008
+#define GL_DEPTH_COMPONENT24                             0x81A6
+#define GL_MAX_TEXTURE_LOD_BIAS                          0x84FD
+#define GL_TEXTURE_COMPARE_MODE                          0x884C
+#define GL_TEXTURE_COMPARE_FUNC                          0x884D
+#define GL_CURRENT_QUERY                                 0x8865
+#define GL_QUERY_RESULT                                  0x8866
+#define GL_QUERY_RESULT_AVAILABLE                        0x8867
+#define GL_BUFFER_MAPPED                                 0x88BC
+#define GL_BUFFER_MAP_POINTER                            0x88BD
+#define GL_STREAM_READ                                   0x88E1
+#define GL_STREAM_COPY                                   0x88E2
+#define GL_STATIC_READ                                   0x88E5
+#define GL_STATIC_COPY                                   0x88E6
+#define GL_DYNAMIC_READ                                  0x88E9
+#define GL_DYNAMIC_COPY                                  0x88EA
+#define GL_MAX_DRAW_BUFFERS                              0x8824
+#define GL_DRAW_BUFFER0                                  0x8825
+#define GL_DRAW_BUFFER1                                  0x8826
+#define GL_DRAW_BUFFER2                                  0x8827
+#define GL_DRAW_BUFFER3                                  0x8828
+#define GL_DRAW_BUFFER4                                  0x8829
+#define GL_DRAW_BUFFER5                                  0x882A
+#define GL_DRAW_BUFFER6                                  0x882B
+#define GL_DRAW_BUFFER7                                  0x882C
+#define GL_DRAW_BUFFER8                                  0x882D
+#define GL_DRAW_BUFFER9                                  0x882E
+#define GL_DRAW_BUFFER10                                 0x882F
+#define GL_DRAW_BUFFER11                                 0x8830
+#define GL_DRAW_BUFFER12                                 0x8831
+#define GL_DRAW_BUFFER13                                 0x8832
+#define GL_DRAW_BUFFER14                                 0x8833
+#define GL_DRAW_BUFFER15                                 0x8834
+#define GL_MAX_FRAGMENT_UNIFORM_COMPONENTS               0x8B49
+#define GL_MAX_VERTEX_UNIFORM_COMPONENTS                 0x8B4A
+#define GL_SAMPLER_3D                                    0x8B5F
+#define GL_SAMPLER_2D_SHADOW                             0x8B62
+#define GL_FRAGMENT_SHADER_DERIVATIVE_HINT               0x8B8B
+#define GL_PIXEL_PACK_BUFFER                             0x88EB
+#define GL_PIXEL_UNPACK_BUFFER                           0x88EC
+#define GL_PIXEL_PACK_BUFFER_BINDING                     0x88ED
+#define GL_PIXEL_UNPACK_BUFFER_BINDING                   0x88EF
+#define GL_FLOAT_MAT2x3                                  0x8B65
+#define GL_FLOAT_MAT2x4                                  0x8B66
+#define GL_FLOAT_MAT3x2                                  0x8B67
+#define GL_FLOAT_MAT3x4                                  0x8B68
+#define GL_FLOAT_MAT4x2                                  0x8B69
+#define GL_FLOAT_MAT4x3                                  0x8B6A
+#define GL_SRGB                                          0x8C40
+#define GL_SRGB8                                         0x8C41
+#define GL_SRGB8_ALPHA8                                  0x8C43
+#define GL_COMPARE_REF_TO_TEXTURE                        0x884E
+#define GL_MAJOR_VERSION                                 0x821B
+#define GL_MINOR_VERSION                                 0x821C
+#define GL_NUM_EXTENSIONS                                0x821D
+#define GL_RGBA32F                                       0x8814
+#define GL_RGB32F                                        0x8815
+#define GL_RGBA16F                                       0x881A
+#define GL_RGB16F                                        0x881B
+#define GL_VERTEX_ATTRIB_ARRAY_INTEGER                   0x88FD
+#define GL_MAX_ARRAY_TEXTURE_LAYERS                      0x88FF
+#define GL_MIN_PROGRAM_TEXEL_OFFSET                      0x8904
+#define GL_MAX_PROGRAM_TEXEL_OFFSET                      0x8905
+#define GL_MAX_VARYING_COMPONENTS                        0x8B4B
+#define GL_TEXTURE_2D_ARRAY                              0x8C1A
+#define GL_TEXTURE_BINDING_2D_ARRAY                      0x8C1D
+#define GL_R11F_G11F_B10F                                0x8C3A
+#define GL_UNSIGNED_INT_10F_11F_11F_REV                  0x8C3B
+#define GL_RGB9_E5                                       0x8C3D
+#define GL_UNSIGNED_INT_5_9_9_9_REV                      0x8C3E
+#define GL_TRANSFORM_FEEDBACK_VARYING_MAX_LENGTH         0x8C76
+#define GL_TRANSFORM_FEEDBACK_BUFFER_MODE                0x8C7F
+#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS    0x8C80
+#define GL_TRANSFORM_FEEDBACK_VARYINGS                   0x8C83
+#define GL_TRANSFORM_FEEDBACK_BUFFER_START               0x8C84
+#define GL_TRANSFORM_FEEDBACK_BUFFER_SIZE                0x8C85
+#define GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN         0x8C88
+#define GL_RASTERIZER_DISCARD                            0x8C89
+#define GL_MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS 0x8C8A
+#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS       0x8C8B
+#define GL_INTERLEAVED_ATTRIBS                           0x8C8C
+#define GL_SEPARATE_ATTRIBS                              0x8C8D
+#define GL_TRANSFORM_FEEDBACK_BUFFER                     0x8C8E
+#define GL_TRANSFORM_FEEDBACK_BUFFER_BINDING             0x8C8F
+#define GL_RGBA32UI                                      0x8D70
+#define GL_RGB32UI                                       0x8D71
+#define GL_RGBA16UI                                      0x8D76
+#define GL_RGB16UI                                       0x8D77
+#define GL_RGBA8UI                                       0x8D7C
+#define GL_RGB8UI                                        0x8D7D
+#define GL_RGBA32I                                       0x8D82
+#define GL_RGB32I                                        0x8D83
+#define GL_RGBA16I                                       0x8D88
+#define GL_RGB16I                                        0x8D89
+#define GL_RGBA8I                                        0x8D8E
+#define GL_RGB8I                                         0x8D8F
+#define GL_RED_INTEGER                                   0x8D94
+#define GL_RGB_INTEGER                                   0x8D98
+#define GL_RGBA_INTEGER                                  0x8D99
+#define GL_SAMPLER_2D_ARRAY                              0x8DC1
+#define GL_SAMPLER_2D_ARRAY_SHADOW                       0x8DC4
+#define GL_SAMPLER_CUBE_SHADOW                           0x8DC5
+#define GL_UNSIGNED_INT_VEC2                             0x8DC6
+#define GL_UNSIGNED_INT_VEC3                             0x8DC7
+#define GL_UNSIGNED_INT_VEC4                             0x8DC8
+#define GL_INT_SAMPLER_2D                                0x8DCA
+#define GL_INT_SAMPLER_3D                                0x8DCB
+#define GL_INT_SAMPLER_CUBE                              0x8DCC
+#define GL_INT_SAMPLER_2D_ARRAY                          0x8DCF
+#define GL_UNSIGNED_INT_SAMPLER_2D                       0x8DD2
+#define GL_UNSIGNED_INT_SAMPLER_3D                       0x8DD3
+#define GL_UNSIGNED_INT_SAMPLER_CUBE                     0x8DD4
+#define GL_UNSIGNED_INT_SAMPLER_2D_ARRAY                 0x8DD7
+#define GL_BUFFER_ACCESS_FLAGS                           0x911F
+#define GL_BUFFER_MAP_LENGTH                             0x9120
+#define GL_BUFFER_MAP_OFFSET                             0x9121
+#define GL_DEPTH_COMPONENT32F                            0x8CAC
+#define GL_DEPTH32F_STENCIL8                             0x8CAD
+#define GL_FLOAT_32_UNSIGNED_INT_24_8_REV                0x8DAD
+#define GL_FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING         0x8210
+#define GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE         0x8211
+#define GL_FRAMEBUFFER_ATTACHMENT_RED_SIZE               0x8212
+#define GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE             0x8213
+#define GL_FRAMEBUFFER_ATTACHMENT_BLUE_SIZE              0x8214
+#define GL_FRAMEBUFFER_ATTACHMENT_ALPHA_SIZE             0x8215
+#define GL_FRAMEBUFFER_ATTACHMENT_DEPTH_SIZE             0x8216
+#define GL_FRAMEBUFFER_ATTACHMENT_STENCIL_SIZE           0x8217
+#define GL_FRAMEBUFFER_DEFAULT                           0x8218
+#define GL_FRAMEBUFFER_UNDEFINED                         0x8219
+#define GL_DEPTH_STENCIL_ATTACHMENT                      0x821A
+#define GL_DEPTH_STENCIL                                 0x84F9
+#define GL_UNSIGNED_INT_24_8                             0x84FA
+#define GL_DEPTH24_STENCIL8                              0x88F0
+#define GL_UNSIGNED_NORMALIZED                           0x8C17
+#define GL_DRAW_FRAMEBUFFER_BINDING                      GL_FRAMEBUFFER_BINDING
+#define GL_READ_FRAMEBUFFER                              0x8CA8
+#define GL_DRAW_FRAMEBUFFER                              0x8CA9
+#define GL_READ_FRAMEBUFFER_BINDING                      0x8CAA
+#define GL_RENDERBUFFER_SAMPLES                          0x8CAB
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LAYER          0x8CD4
+#define GL_MAX_COLOR_ATTACHMENTS                         0x8CDF
+#define GL_COLOR_ATTACHMENT1                             0x8CE1
+#define GL_COLOR_ATTACHMENT2                             0x8CE2
+#define GL_COLOR_ATTACHMENT3                             0x8CE3
+#define GL_COLOR_ATTACHMENT4                             0x8CE4
+#define GL_COLOR_ATTACHMENT5                             0x8CE5
+#define GL_COLOR_ATTACHMENT6                             0x8CE6
+#define GL_COLOR_ATTACHMENT7                             0x8CE7
+#define GL_COLOR_ATTACHMENT8                             0x8CE8
+#define GL_COLOR_ATTACHMENT9                             0x8CE9
+#define GL_COLOR_ATTACHMENT10                            0x8CEA
+#define GL_COLOR_ATTACHMENT11                            0x8CEB
+#define GL_COLOR_ATTACHMENT12                            0x8CEC
+#define GL_COLOR_ATTACHMENT13                            0x8CED
+#define GL_COLOR_ATTACHMENT14                            0x8CEE
+#define GL_COLOR_ATTACHMENT15                            0x8CEF
+#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE            0x8D56
+#define GL_MAX_SAMPLES                                   0x8D57
+#define GL_HALF_FLOAT                                    0x140B
+#define GL_MAP_READ_BIT                                  0x0001
+#define GL_MAP_WRITE_BIT                                 0x0002
+#define GL_MAP_INVALIDATE_RANGE_BIT                      0x0004
+#define GL_MAP_INVALIDATE_BUFFER_BIT                     0x0008
+#define GL_MAP_FLUSH_EXPLICIT_BIT                        0x0010
+#define GL_MAP_UNSYNCHRONIZED_BIT                        0x0020
+#define GL_RG                                            0x8227
+#define GL_RG_INTEGER                                    0x8228
+#define GL_R8                                            0x8229
+#define GL_RG8                                           0x822B
+#define GL_R16F                                          0x822D
+#define GL_R32F                                          0x822E
+#define GL_RG16F                                         0x822F
+#define GL_RG32F                                         0x8230
+#define GL_R8I                                           0x8231
+#define GL_R8UI                                          0x8232
+#define GL_R16I                                          0x8233
+#define GL_R16UI                                         0x8234
+#define GL_R32I                                          0x8235
+#define GL_R32UI                                         0x8236
+#define GL_RG8I                                          0x8237
+#define GL_RG8UI                                         0x8238
+#define GL_RG16I                                         0x8239
+#define GL_RG16UI                                        0x823A
+#define GL_RG32I                                         0x823B
+#define GL_RG32UI                                        0x823C
+#define GL_VERTEX_ARRAY_BINDING                          0x85B5
+#define GL_R8_SNORM                                      0x8F94
+#define GL_RG8_SNORM                                     0x8F95
+#define GL_RGB8_SNORM                                    0x8F96
+#define GL_RGBA8_SNORM                                   0x8F97
+#define GL_SIGNED_NORMALIZED                             0x8F9C
+#define GL_PRIMITIVE_RESTART_FIXED_INDEX                 0x8D69
+#define GL_COPY_READ_BUFFER                              0x8F36
+#define GL_COPY_WRITE_BUFFER                             0x8F37
+#define GL_COPY_READ_BUFFER_BINDING                      GL_COPY_READ_BUFFER
+#define GL_COPY_WRITE_BUFFER_BINDING                     GL_COPY_WRITE_BUFFER
+#define GL_UNIFORM_BUFFER                                0x8A11
+#define GL_UNIFORM_BUFFER_BINDING                        0x8A28
+#define GL_UNIFORM_BUFFER_START                          0x8A29
+#define GL_UNIFORM_BUFFER_SIZE                           0x8A2A
+#define GL_MAX_VERTEX_UNIFORM_BLOCKS                     0x8A2B
+#define GL_MAX_FRAGMENT_UNIFORM_BLOCKS                   0x8A2D
+#define GL_MAX_COMBINED_UNIFORM_BLOCKS                   0x8A2E
+#define GL_MAX_UNIFORM_BUFFER_BINDINGS                   0x8A2F
+#define GL_MAX_UNIFORM_BLOCK_SIZE                        0x8A30
+#define GL_MAX_COMBINED_VERTEX_UNIFORM_COMPONENTS        0x8A31
+#define GL_MAX_COMBINED_FRAGMENT_UNIFORM_COMPONENTS      0x8A33
+#define GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT               0x8A34
+#define GL_ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH          0x8A35
+#define GL_ACTIVE_UNIFORM_BLOCKS                         0x8A36
+#define GL_UNIFORM_TYPE                                  0x8A37
+#define GL_UNIFORM_SIZE                                  0x8A38
+#define GL_UNIFORM_NAME_LENGTH                           0x8A39
+#define GL_UNIFORM_BLOCK_INDEX                           0x8A3A
+#define GL_UNIFORM_OFFSET                                0x8A3B
+#define GL_UNIFORM_ARRAY_STRIDE                          0x8A3C
+#define GL_UNIFORM_MATRIX_STRIDE                         0x8A3D
+#define GL_UNIFORM_IS_ROW_MAJOR                          0x8A3E
+#define GL_UNIFORM_BLOCK_BINDING                         0x8A3F
+#define GL_UNIFORM_BLOCK_DATA_SIZE                       0x8A40
+#define GL_UNIFORM_BLOCK_NAME_LENGTH                     0x8A41
+#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORMS                 0x8A42
+#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORM_INDICES          0x8A43
+#define GL_UNIFORM_BLOCK_REFERENCED_BY_VERTEX_SHADER     0x8A44
+#define GL_UNIFORM_BLOCK_REFERENCED_BY_FRAGMENT_SHADER   0x8A46
+#define GL_INVALID_INDEX                                 0xFFFFFFFFu
+#define GL_MAX_VERTEX_OUTPUT_COMPONENTS                  0x9122
+#define GL_MAX_FRAGMENT_INPUT_COMPONENTS                 0x9125
+#define GL_MAX_SERVER_WAIT_TIMEOUT                       0x9111
+#define GL_OBJECT_TYPE                                   0x9112
+#define GL_SYNC_CONDITION                                0x9113
+#define GL_SYNC_STATUS                                   0x9114
+#define GL_SYNC_FLAGS                                    0x9115
+#define GL_SYNC_FENCE                                    0x9116
+#define GL_SYNC_GPU_COMMANDS_COMPLETE                    0x9117
+#define GL_UNSIGNALED                                    0x9118
+#define GL_SIGNALED                                      0x9119
+#define GL_ALREADY_SIGNALED                              0x911A
+#define GL_TIMEOUT_EXPIRED                               0x911B
+#define GL_CONDITION_SATISFIED                           0x911C
+#define GL_WAIT_FAILED                                   0x911D
+#define GL_SYNC_FLUSH_COMMANDS_BIT                       0x00000001
+#define GL_TIMEOUT_IGNORED                               0xFFFFFFFFFFFFFFFFull
+#define GL_VERTEX_ATTRIB_ARRAY_DIVISOR                   0x88FE
+#define GL_ANY_SAMPLES_PASSED                            0x8C2F
+#define GL_ANY_SAMPLES_PASSED_CONSERVATIVE               0x8D6A
+#define GL_SAMPLER_BINDING                               0x8919
+#define GL_RGB10_A2UI                                    0x906F
+#define GL_TEXTURE_SWIZZLE_R                             0x8E42
+#define GL_TEXTURE_SWIZZLE_G                             0x8E43
+#define GL_TEXTURE_SWIZZLE_B                             0x8E44
+#define GL_TEXTURE_SWIZZLE_A                             0x8E45
+#define GL_GREEN                                         0x1904
+#define GL_BLUE                                          0x1905
+#define GL_INT_2_10_10_10_REV                            0x8D9F
+#define GL_TRANSFORM_FEEDBACK                            0x8E22
+#define GL_TRANSFORM_FEEDBACK_PAUSED                     0x8E23
+#define GL_TRANSFORM_FEEDBACK_ACTIVE                     0x8E24
+#define GL_TRANSFORM_FEEDBACK_BINDING                    0x8E25
+#define GL_PROGRAM_BINARY_RETRIEVABLE_HINT               0x8257
+#define GL_PROGRAM_BINARY_LENGTH                         0x8741
+#define GL_NUM_PROGRAM_BINARY_FORMATS                    0x87FE
+#define GL_PROGRAM_BINARY_FORMATS                        0x87FF
+#define GL_COMPRESSED_R11_EAC                            0x9270
+#define GL_COMPRESSED_SIGNED_R11_EAC                     0x9271
+#define GL_COMPRESSED_RG11_EAC                           0x9272
+#define GL_COMPRESSED_SIGNED_RG11_EAC                    0x9273
+#define GL_COMPRESSED_RGB8_ETC2                          0x9274
+#define GL_COMPRESSED_SRGB8_ETC2                         0x9275
+#define GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2      0x9276
+#define GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2     0x9277
+#define GL_COMPRESSED_RGBA8_ETC2_EAC                     0x9278
+#define GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC              0x9279
+#define GL_TEXTURE_IMMUTABLE_FORMAT                      0x912F
+#define GL_MAX_ELEMENT_INDEX                             0x8D6B
+#define GL_NUM_SAMPLE_COUNTS                             0x9380
+#define GL_TEXTURE_IMMUTABLE_LEVELS                      0x82DF
+
+/*-------------------------------------------------------------------------
+ * Entrypoint definitions
+ *-----------------------------------------------------------------------*/
+
+/* OpenGL ES 2.0 */
+
+GL_APICALL void           GL_APIENTRY glActiveTexture (GLenum texture);
+GL_APICALL void           GL_APIENTRY glAttachShader (GLuint program, GLuint shader);
+GL_APICALL void           GL_APIENTRY glBindAttribLocation (GLuint program, GLuint index, const GLchar* name);
+GL_APICALL void           GL_APIENTRY glBindBuffer (GLenum target, GLuint buffer);
+GL_APICALL void           GL_APIENTRY glBindFramebuffer (GLenum target, GLuint framebuffer);
+GL_APICALL void           GL_APIENTRY glBindRenderbuffer (GLenum target, GLuint renderbuffer);
+GL_APICALL void           GL_APIENTRY glBindTexture (GLenum target, GLuint texture);
+GL_APICALL void           GL_APIENTRY glBlendColor (GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha);
+GL_APICALL void           GL_APIENTRY glBlendEquation (GLenum mode);
+GL_APICALL void           GL_APIENTRY glBlendEquationSeparate (GLenum modeRGB, GLenum modeAlpha);
+GL_APICALL void           GL_APIENTRY glBlendFunc (GLenum sfactor, GLenum dfactor);
+GL_APICALL void           GL_APIENTRY glBlendFuncSeparate (GLenum srcRGB, GLenum dstRGB, GLenum srcAlpha, GLenum dstAlpha);
+GL_APICALL void           GL_APIENTRY glBufferData (GLenum target, GLsizeiptr size, const GLvoid* data, GLenum usage);
+GL_APICALL void           GL_APIENTRY glBufferSubData (GLenum target, GLintptr offset, GLsizeiptr size, const GLvoid* data);
+GL_APICALL GLenum         GL_APIENTRY glCheckFramebufferStatus (GLenum target);
+GL_APICALL void           GL_APIENTRY glClear (GLbitfield mask);
+GL_APICALL void           GL_APIENTRY glClearColor (GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha);
+GL_APICALL void           GL_APIENTRY glClearDepthf (GLfloat depth);
+GL_APICALL void           GL_APIENTRY glClearStencil (GLint s);
+GL_APICALL void           GL_APIENTRY glColorMask (GLboolean red, GLboolean green, GLboolean blue, GLboolean alpha);
+GL_APICALL void           GL_APIENTRY glCompileShader (GLuint shader);
+GL_APICALL void           GL_APIENTRY glCompressedTexImage2D (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const GLvoid* data);
+GL_APICALL void           GL_APIENTRY glCompressedTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const GLvoid* data);
+GL_APICALL void           GL_APIENTRY glCopyTexImage2D (GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border);
+GL_APICALL void           GL_APIENTRY glCopyTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+GL_APICALL GLuint         GL_APIENTRY glCreateProgram (void);
+GL_APICALL GLuint         GL_APIENTRY glCreateShader (GLenum type);
+GL_APICALL void           GL_APIENTRY glCullFace (GLenum mode);
+GL_APICALL void           GL_APIENTRY glDeleteBuffers (GLsizei n, const GLuint* buffers);
+GL_APICALL void           GL_APIENTRY glDeleteFramebuffers (GLsizei n, const GLuint* framebuffers);
+GL_APICALL void           GL_APIENTRY glDeleteProgram (GLuint program);
+GL_APICALL void           GL_APIENTRY glDeleteRenderbuffers (GLsizei n, const GLuint* renderbuffers);
+GL_APICALL void           GL_APIENTRY glDeleteShader (GLuint shader);
+GL_APICALL void           GL_APIENTRY glDeleteTextures (GLsizei n, const GLuint* textures);
+GL_APICALL void           GL_APIENTRY glDepthFunc (GLenum func);
+GL_APICALL void           GL_APIENTRY glDepthMask (GLboolean flag);
+GL_APICALL void           GL_APIENTRY glDepthRangef (GLfloat n, GLfloat f);
+GL_APICALL void           GL_APIENTRY glDetachShader (GLuint program, GLuint shader);
+GL_APICALL void           GL_APIENTRY glDisable (GLenum cap);
+GL_APICALL void           GL_APIENTRY glDisableVertexAttribArray (GLuint index);
+GL_APICALL void           GL_APIENTRY glDrawArrays (GLenum mode, GLint first, GLsizei count);
+GL_APICALL void           GL_APIENTRY glDrawElements (GLenum mode, GLsizei count, GLenum type, const GLvoid* indices);
+GL_APICALL void           GL_APIENTRY glEnable (GLenum cap);
+GL_APICALL void           GL_APIENTRY glEnableVertexAttribArray (GLuint index);
+GL_APICALL void           GL_APIENTRY glFinish (void);
+GL_APICALL void           GL_APIENTRY glFlush (void);
+GL_APICALL void           GL_APIENTRY glFramebufferRenderbuffer (GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer);
+GL_APICALL void           GL_APIENTRY glFramebufferTexture2D (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
+GL_APICALL void           GL_APIENTRY glFrontFace (GLenum mode);
+GL_APICALL void           GL_APIENTRY glGenBuffers (GLsizei n, GLuint* buffers);
+GL_APICALL void           GL_APIENTRY glGenerateMipmap (GLenum target);
+GL_APICALL void           GL_APIENTRY glGenFramebuffers (GLsizei n, GLuint* framebuffers);
+GL_APICALL void           GL_APIENTRY glGenRenderbuffers (GLsizei n, GLuint* renderbuffers);
+GL_APICALL void           GL_APIENTRY glGenTextures (GLsizei n, GLuint* textures);
+GL_APICALL void           GL_APIENTRY glGetActiveAttrib (GLuint program, GLuint index, GLsizei bufsize, GLsizei* length, GLint* size, GLenum* type, GLchar* name);
+GL_APICALL void           GL_APIENTRY glGetActiveUniform (GLuint program, GLuint index, GLsizei bufsize, GLsizei* length, GLint* size, GLenum* type, GLchar* name);
+GL_APICALL void           GL_APIENTRY glGetAttachedShaders (GLuint program, GLsizei maxcount, GLsizei* count, GLuint* shaders);
+GL_APICALL GLint          GL_APIENTRY glGetAttribLocation (GLuint program, const GLchar* name);
+GL_APICALL void           GL_APIENTRY glGetBooleanv (GLenum pname, GLboolean* params);
+GL_APICALL void           GL_APIENTRY glGetBufferParameteriv (GLenum target, GLenum pname, GLint* params);
+GL_APICALL GLenum         GL_APIENTRY glGetError (void);
+GL_APICALL void           GL_APIENTRY glGetFloatv (GLenum pname, GLfloat* params);
+GL_APICALL void           GL_APIENTRY glGetFramebufferAttachmentParameteriv (GLenum target, GLenum attachment, GLenum pname, GLint* params);
+GL_APICALL void           GL_APIENTRY glGetIntegerv (GLenum pname, GLint* params);
+GL_APICALL void           GL_APIENTRY glGetProgramiv (GLuint program, GLenum pname, GLint* params);
+GL_APICALL void           GL_APIENTRY glGetProgramInfoLog (GLuint program, GLsizei bufsize, GLsizei* length, GLchar* infolog);
+GL_APICALL void           GL_APIENTRY glGetRenderbufferParameteriv (GLenum target, GLenum pname, GLint* params);
+GL_APICALL void           GL_APIENTRY glGetShaderiv (GLuint shader, GLenum pname, GLint* params);
+GL_APICALL void           GL_APIENTRY glGetShaderInfoLog (GLuint shader, GLsizei bufsize, GLsizei* length, GLchar* infolog);
+GL_APICALL void           GL_APIENTRY glGetShaderPrecisionFormat (GLenum shadertype, GLenum precisiontype, GLint* range, GLint* precision);
+GL_APICALL void           GL_APIENTRY glGetShaderSource (GLuint shader, GLsizei bufsize, GLsizei* length, GLchar* source);
+GL_APICALL const GLubyte* GL_APIENTRY glGetString (GLenum name);
+GL_APICALL void           GL_APIENTRY glGetTexParameterfv (GLenum target, GLenum pname, GLfloat* params);
+GL_APICALL void           GL_APIENTRY glGetTexParameteriv (GLenum target, GLenum pname, GLint* params);
+GL_APICALL void           GL_APIENTRY glGetUniformfv (GLuint program, GLint location, GLfloat* params);
+GL_APICALL void           GL_APIENTRY glGetUniformiv (GLuint program, GLint location, GLint* params);
+GL_APICALL GLint          GL_APIENTRY glGetUniformLocation (GLuint program, const GLchar* name);
+GL_APICALL void           GL_APIENTRY glGetVertexAttribfv (GLuint index, GLenum pname, GLfloat* params);
+GL_APICALL void           GL_APIENTRY glGetVertexAttribiv (GLuint index, GLenum pname, GLint* params);
+GL_APICALL void           GL_APIENTRY glGetVertexAttribPointerv (GLuint index, GLenum pname, GLvoid** pointer);
+GL_APICALL void           GL_APIENTRY glHint (GLenum target, GLenum mode);
+GL_APICALL GLboolean      GL_APIENTRY glIsBuffer (GLuint buffer);
+GL_APICALL GLboolean      GL_APIENTRY glIsEnabled (GLenum cap);
+GL_APICALL GLboolean      GL_APIENTRY glIsFramebuffer (GLuint framebuffer);
+GL_APICALL GLboolean      GL_APIENTRY glIsProgram (GLuint program);
+GL_APICALL GLboolean      GL_APIENTRY glIsRenderbuffer (GLuint renderbuffer);
+GL_APICALL GLboolean      GL_APIENTRY glIsShader (GLuint shader);
+GL_APICALL GLboolean      GL_APIENTRY glIsTexture (GLuint texture);
+GL_APICALL void           GL_APIENTRY glLineWidth (GLfloat width);
+GL_APICALL void           GL_APIENTRY glLinkProgram (GLuint program);
+GL_APICALL void           GL_APIENTRY glPixelStorei (GLenum pname, GLint param);
+GL_APICALL void           GL_APIENTRY glPolygonOffset (GLfloat factor, GLfloat units);
+GL_APICALL void           GL_APIENTRY glReadPixels (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLvoid* pixels);
+GL_APICALL void           GL_APIENTRY glReleaseShaderCompiler (void);
+GL_APICALL void           GL_APIENTRY glRenderbufferStorage (GLenum target, GLenum internalformat, GLsizei width, GLsizei height);
+GL_APICALL void           GL_APIENTRY glSampleCoverage (GLfloat value, GLboolean invert);
+GL_APICALL void           GL_APIENTRY glScissor (GLint x, GLint y, GLsizei width, GLsizei height);
+GL_APICALL void           GL_APIENTRY glShaderBinary (GLsizei n, const GLuint* shaders, GLenum binaryformat, const GLvoid* binary, GLsizei length);
+GL_APICALL void           GL_APIENTRY glShaderSource (GLuint shader, GLsizei count, const GLchar* const* string, const GLint* length);
+GL_APICALL void           GL_APIENTRY glStencilFunc (GLenum func, GLint ref, GLuint mask);
+GL_APICALL void           GL_APIENTRY glStencilFuncSeparate (GLenum face, GLenum func, GLint ref, GLuint mask);
+GL_APICALL void           GL_APIENTRY glStencilMask (GLuint mask);
+GL_APICALL void           GL_APIENTRY glStencilMaskSeparate (GLenum face, GLuint mask);
+GL_APICALL void           GL_APIENTRY glStencilOp (GLenum fail, GLenum zfail, GLenum zpass);
+GL_APICALL void           GL_APIENTRY glStencilOpSeparate (GLenum face, GLenum fail, GLenum zfail, GLenum zpass);
+GL_APICALL void           GL_APIENTRY glTexImage2D (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid* pixels);
+GL_APICALL void           GL_APIENTRY glTexParameterf (GLenum target, GLenum pname, GLfloat param);
+GL_APICALL void           GL_APIENTRY glTexParameterfv (GLenum target, GLenum pname, const GLfloat* params);
+GL_APICALL void           GL_APIENTRY glTexParameteri (GLenum target, GLenum pname, GLint param);
+GL_APICALL void           GL_APIENTRY glTexParameteriv (GLenum target, GLenum pname, const GLint* params);
+GL_APICALL void           GL_APIENTRY glTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid* pixels);
+GL_APICALL void           GL_APIENTRY glUniform1f (GLint location, GLfloat x);
+GL_APICALL void           GL_APIENTRY glUniform1fv (GLint location, GLsizei count, const GLfloat* v);
+GL_APICALL void           GL_APIENTRY glUniform1i (GLint location, GLint x);
+GL_APICALL void           GL_APIENTRY glUniform1iv (GLint location, GLsizei count, const GLint* v);
+GL_APICALL void           GL_APIENTRY glUniform2f (GLint location, GLfloat x, GLfloat y);
+GL_APICALL void           GL_APIENTRY glUniform2fv (GLint location, GLsizei count, const GLfloat* v);
+GL_APICALL void           GL_APIENTRY glUniform2i (GLint location, GLint x, GLint y);
+GL_APICALL void           GL_APIENTRY glUniform2iv (GLint location, GLsizei count, const GLint* v);
+GL_APICALL void           GL_APIENTRY glUniform3f (GLint location, GLfloat x, GLfloat y, GLfloat z);
+GL_APICALL void           GL_APIENTRY glUniform3fv (GLint location, GLsizei count, const GLfloat* v);
+GL_APICALL void           GL_APIENTRY glUniform3i (GLint location, GLint x, GLint y, GLint z);
+GL_APICALL void           GL_APIENTRY glUniform3iv (GLint location, GLsizei count, const GLint* v);
+GL_APICALL void           GL_APIENTRY glUniform4f (GLint location, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+GL_APICALL void           GL_APIENTRY glUniform4fv (GLint location, GLsizei count, const GLfloat* v);
+GL_APICALL void           GL_APIENTRY glUniform4i (GLint location, GLint x, GLint y, GLint z, GLint w);
+GL_APICALL void           GL_APIENTRY glUniform4iv (GLint location, GLsizei count, const GLint* v);
+GL_APICALL void           GL_APIENTRY glUniformMatrix2fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+GL_APICALL void           GL_APIENTRY glUniformMatrix3fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+GL_APICALL void           GL_APIENTRY glUniformMatrix4fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+GL_APICALL void           GL_APIENTRY glUseProgram (GLuint program);
+GL_APICALL void           GL_APIENTRY glValidateProgram (GLuint program);
+GL_APICALL void           GL_APIENTRY glVertexAttrib1f (GLuint indx, GLfloat x);
+GL_APICALL void           GL_APIENTRY glVertexAttrib1fv (GLuint indx, const GLfloat* values);
+GL_APICALL void           GL_APIENTRY glVertexAttrib2f (GLuint indx, GLfloat x, GLfloat y);
+GL_APICALL void           GL_APIENTRY glVertexAttrib2fv (GLuint indx, const GLfloat* values);
+GL_APICALL void           GL_APIENTRY glVertexAttrib3f (GLuint indx, GLfloat x, GLfloat y, GLfloat z);
+GL_APICALL void           GL_APIENTRY glVertexAttrib3fv (GLuint indx, const GLfloat* values);
+GL_APICALL void           GL_APIENTRY glVertexAttrib4f (GLuint indx, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+GL_APICALL void           GL_APIENTRY glVertexAttrib4fv (GLuint indx, const GLfloat* values);
+GL_APICALL void           GL_APIENTRY glVertexAttribPointer (GLuint indx, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const GLvoid* ptr);
+GL_APICALL void           GL_APIENTRY glViewport (GLint x, GLint y, GLsizei width, GLsizei height);
+
+/* OpenGL ES 3.0 */
+
+GL_APICALL void           GL_APIENTRY glReadBuffer (GLenum mode);
+GL_APICALL void           GL_APIENTRY glDrawRangeElements (GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid* indices);
+GL_APICALL void           GL_APIENTRY glTexImage3D (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const GLvoid* pixels);
+GL_APICALL void           GL_APIENTRY glTexSubImage3D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const GLvoid* pixels);
+GL_APICALL void           GL_APIENTRY glCopyTexSubImage3D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+GL_APICALL void           GL_APIENTRY glCompressedTexImage3D (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const GLvoid* data);
+GL_APICALL void           GL_APIENTRY glCompressedTexSubImage3D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const GLvoid* data);
+GL_APICALL void           GL_APIENTRY glGenQueries (GLsizei n, GLuint* ids);
+GL_APICALL void           GL_APIENTRY glDeleteQueries (GLsizei n, const GLuint* ids);
+GL_APICALL GLboolean      GL_APIENTRY glIsQuery (GLuint id);
+GL_APICALL void           GL_APIENTRY glBeginQuery (GLenum target, GLuint id);
+GL_APICALL void           GL_APIENTRY glEndQuery (GLenum target);
+GL_APICALL void           GL_APIENTRY glGetQueryiv (GLenum target, GLenum pname, GLint* params);
+GL_APICALL void           GL_APIENTRY glGetQueryObjectuiv (GLuint id, GLenum pname, GLuint* params);
+GL_APICALL GLboolean      GL_APIENTRY glUnmapBuffer (GLenum target);
+GL_APICALL void           GL_APIENTRY glGetBufferPointerv (GLenum target, GLenum pname, GLvoid** params);
+GL_APICALL void           GL_APIENTRY glDrawBuffers (GLsizei n, const GLenum* bufs);
+GL_APICALL void           GL_APIENTRY glUniformMatrix2x3fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+GL_APICALL void           GL_APIENTRY glUniformMatrix3x2fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+GL_APICALL void           GL_APIENTRY glUniformMatrix2x4fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+GL_APICALL void           GL_APIENTRY glUniformMatrix4x2fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+GL_APICALL void           GL_APIENTRY glUniformMatrix3x4fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+GL_APICALL void           GL_APIENTRY glUniformMatrix4x3fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value);
+GL_APICALL void           GL_APIENTRY glBlitFramebuffer (GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter);
+GL_APICALL void           GL_APIENTRY glRenderbufferStorageMultisample (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height);
+GL_APICALL void           GL_APIENTRY glFramebufferTextureLayer (GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer);
+GL_APICALL GLvoid*        GL_APIENTRY glMapBufferRange (GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access);
+GL_APICALL void           GL_APIENTRY glFlushMappedBufferRange (GLenum target, GLintptr offset, GLsizeiptr length);
+GL_APICALL void           GL_APIENTRY glBindVertexArray (GLuint array);
+GL_APICALL void           GL_APIENTRY glDeleteVertexArrays (GLsizei n, const GLuint* arrays);
+GL_APICALL void           GL_APIENTRY glGenVertexArrays (GLsizei n, GLuint* arrays);
+GL_APICALL GLboolean      GL_APIENTRY glIsVertexArray (GLuint array);
+GL_APICALL void           GL_APIENTRY glGetIntegeri_v (GLenum target, GLuint index, GLint* data);
+GL_APICALL void           GL_APIENTRY glBeginTransformFeedback (GLenum primitiveMode);
+GL_APICALL void           GL_APIENTRY glEndTransformFeedback (void);
+GL_APICALL void           GL_APIENTRY glBindBufferRange (GLenum target, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size);
+GL_APICALL void           GL_APIENTRY glBindBufferBase (GLenum target, GLuint index, GLuint buffer);
+GL_APICALL void           GL_APIENTRY glTransformFeedbackVaryings (GLuint program, GLsizei count, const GLchar* const* varyings, GLenum bufferMode);
+GL_APICALL void           GL_APIENTRY glGetTransformFeedbackVarying (GLuint program, GLuint index, GLsizei bufSize, GLsizei* length, GLsizei* size, GLenum* type, GLchar* name);
+GL_APICALL void           GL_APIENTRY glVertexAttribIPointer (GLuint index, GLint size, GLenum type, GLsizei stride, const GLvoid* pointer);
+GL_APICALL void           GL_APIENTRY glGetVertexAttribIiv (GLuint index, GLenum pname, GLint* params);
+GL_APICALL void           GL_APIENTRY glGetVertexAttribIuiv (GLuint index, GLenum pname, GLuint* params);
+GL_APICALL void           GL_APIENTRY glVertexAttribI4i (GLuint index, GLint x, GLint y, GLint z, GLint w);
+GL_APICALL void           GL_APIENTRY glVertexAttribI4ui (GLuint index, GLuint x, GLuint y, GLuint z, GLuint w);
+GL_APICALL void           GL_APIENTRY glVertexAttribI4iv (GLuint index, const GLint* v);
+GL_APICALL void           GL_APIENTRY glVertexAttribI4uiv (GLuint index, const GLuint* v);
+GL_APICALL void           GL_APIENTRY glGetUniformuiv (GLuint program, GLint location, GLuint* params);
+GL_APICALL GLint          GL_APIENTRY glGetFragDataLocation (GLuint program, const GLchar *name);
+GL_APICALL void           GL_APIENTRY glUniform1ui (GLint location, GLuint v0);
+GL_APICALL void           GL_APIENTRY glUniform2ui (GLint location, GLuint v0, GLuint v1);
+GL_APICALL void           GL_APIENTRY glUniform3ui (GLint location, GLuint v0, GLuint v1, GLuint v2);
+GL_APICALL void           GL_APIENTRY glUniform4ui (GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+GL_APICALL void           GL_APIENTRY glUniform1uiv (GLint location, GLsizei count, const GLuint* value);
+GL_APICALL void           GL_APIENTRY glUniform2uiv (GLint location, GLsizei count, const GLuint* value);
+GL_APICALL void           GL_APIENTRY glUniform3uiv (GLint location, GLsizei count, const GLuint* value);
+GL_APICALL void           GL_APIENTRY glUniform4uiv (GLint location, GLsizei count, const GLuint* value);
+GL_APICALL void           GL_APIENTRY glClearBufferiv (GLenum buffer, GLint drawbuffer, const GLint* value);
+GL_APICALL void           GL_APIENTRY glClearBufferuiv (GLenum buffer, GLint drawbuffer, const GLuint* value);
+GL_APICALL void           GL_APIENTRY glClearBufferfv (GLenum buffer, GLint drawbuffer, const GLfloat* value);
+GL_APICALL void           GL_APIENTRY glClearBufferfi (GLenum buffer, GLint drawbuffer, GLfloat depth, GLint stencil);
+GL_APICALL const GLubyte* GL_APIENTRY glGetStringi (GLenum name, GLuint index);
+GL_APICALL void           GL_APIENTRY glCopyBufferSubData (GLenum readTarget, GLenum writeTarget, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size);
+GL_APICALL void           GL_APIENTRY glGetUniformIndices (GLuint program, GLsizei uniformCount, const GLchar* const* uniformNames, GLuint* uniformIndices);
+GL_APICALL void           GL_APIENTRY glGetActiveUniformsiv (GLuint program, GLsizei uniformCount, const GLuint* uniformIndices, GLenum pname, GLint* params);
+GL_APICALL GLuint         GL_APIENTRY glGetUniformBlockIndex (GLuint program, const GLchar* uniformBlockName);
+GL_APICALL void           GL_APIENTRY glGetActiveUniformBlockiv (GLuint program, GLuint uniformBlockIndex, GLenum pname, GLint* params);
+GL_APICALL void           GL_APIENTRY glGetActiveUniformBlockName (GLuint program, GLuint uniformBlockIndex, GLsizei bufSize, GLsizei* length, GLchar* uniformBlockName);
+GL_APICALL void           GL_APIENTRY glUniformBlockBinding (GLuint program, GLuint uniformBlockIndex, GLuint uniformBlockBinding);
+GL_APICALL void           GL_APIENTRY glDrawArraysInstanced (GLenum mode, GLint first, GLsizei count, GLsizei instanceCount);
+GL_APICALL void           GL_APIENTRY glDrawElementsInstanced (GLenum mode, GLsizei count, GLenum type, const GLvoid* indices, GLsizei instanceCount);
+GL_APICALL GLsync         GL_APIENTRY glFenceSync (GLenum condition, GLbitfield flags);
+GL_APICALL GLboolean      GL_APIENTRY glIsSync (GLsync sync);
+GL_APICALL void           GL_APIENTRY glDeleteSync (GLsync sync);
+GL_APICALL GLenum         GL_APIENTRY glClientWaitSync (GLsync sync, GLbitfield flags, GLuint64 timeout);
+GL_APICALL void           GL_APIENTRY glWaitSync (GLsync sync, GLbitfield flags, GLuint64 timeout);
+GL_APICALL void           GL_APIENTRY glGetInteger64v (GLenum pname, GLint64* params);
+GL_APICALL void           GL_APIENTRY glGetSynciv (GLsync sync, GLenum pname, GLsizei bufSize, GLsizei* length, GLint* values);
+GL_APICALL void           GL_APIENTRY glGetInteger64i_v (GLenum target, GLuint index, GLint64* data);
+GL_APICALL void           GL_APIENTRY glGetBufferParameteri64v (GLenum target, GLenum pname, GLint64* params);
+GL_APICALL void           GL_APIENTRY glGenSamplers (GLsizei count, GLuint* samplers);
+GL_APICALL void           GL_APIENTRY glDeleteSamplers (GLsizei count, const GLuint* samplers);
+GL_APICALL GLboolean      GL_APIENTRY glIsSampler (GLuint sampler);
+GL_APICALL void           GL_APIENTRY glBindSampler (GLuint unit, GLuint sampler);
+GL_APICALL void           GL_APIENTRY glSamplerParameteri (GLuint sampler, GLenum pname, GLint param);
+GL_APICALL void           GL_APIENTRY glSamplerParameteriv (GLuint sampler, GLenum pname, const GLint* param);
+GL_APICALL void           GL_APIENTRY glSamplerParameterf (GLuint sampler, GLenum pname, GLfloat param);
+GL_APICALL void           GL_APIENTRY glSamplerParameterfv (GLuint sampler, GLenum pname, const GLfloat* param);
+GL_APICALL void           GL_APIENTRY glGetSamplerParameteriv (GLuint sampler, GLenum pname, GLint* params);
+GL_APICALL void           GL_APIENTRY glGetSamplerParameterfv (GLuint sampler, GLenum pname, GLfloat* params);
+GL_APICALL void           GL_APIENTRY glVertexAttribDivisor (GLuint index, GLuint divisor);
+GL_APICALL void           GL_APIENTRY glBindTransformFeedback (GLenum target, GLuint id);
+GL_APICALL void           GL_APIENTRY glDeleteTransformFeedbacks (GLsizei n, const GLuint* ids);
+GL_APICALL void           GL_APIENTRY glGenTransformFeedbacks (GLsizei n, GLuint* ids);
+GL_APICALL GLboolean      GL_APIENTRY glIsTransformFeedback (GLuint id);
+GL_APICALL void           GL_APIENTRY glPauseTransformFeedback (void);
+GL_APICALL void           GL_APIENTRY glResumeTransformFeedback (void);
+GL_APICALL void           GL_APIENTRY glGetProgramBinary (GLuint program, GLsizei bufSize, GLsizei* length, GLenum* binaryFormat, GLvoid* binary);
+GL_APICALL void           GL_APIENTRY glProgramBinary (GLuint program, GLenum binaryFormat, const GLvoid* binary, GLsizei length);
+GL_APICALL void           GL_APIENTRY glProgramParameteri (GLuint program, GLenum pname, GLint value);
+GL_APICALL void           GL_APIENTRY glInvalidateFramebuffer (GLenum target, GLsizei numAttachments, const GLenum* attachments);
+GL_APICALL void           GL_APIENTRY glInvalidateSubFramebuffer (GLenum target, GLsizei numAttachments, const GLenum* attachments, GLint x, GLint y, GLsizei width, GLsizei height);
+GL_APICALL void           GL_APIENTRY glTexStorage2D (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
+GL_APICALL void           GL_APIENTRY glTexStorage3D (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
+GL_APICALL void           GL_APIENTRY glGetInternalformativ (GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint* params);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/opengles-3.1/include/GLES3/gl31.h b/opengles-3.1/include/GLES3/gl31.h
new file mode 100644
index 0000000000..987724a349
--- /dev/null
+++ b/opengles-3.1/include/GLES3/gl31.h
@@ -0,0 +1,1187 @@
+#ifndef __gl31_h_
+#define __gl31_h_ 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** Copyright (c) 2013-2014 The Khronos Group Inc.
+**
+** Permission is hereby granted, free of charge, to any person obtaining a
+** copy of this software and/or associated documentation files (the
+** "Materials"), to deal in the Materials without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Materials, and to
+** permit persons to whom the Materials are furnished to do so, subject to
+** the following conditions:
+**
+** The above copyright notice and this permission notice shall be included
+** in all copies or substantial portions of the Materials.
+**
+** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+*/
+/*
+** This header is generated from the Khronos OpenGL / OpenGL ES XML
+** API Registry. The current version of the Registry, generator scripts
+** used to make the header, and the header can be found at
+**   http://www.opengl.org/registry/
+**
+** Khronos $Revision$ on $Date$
+*/
+
+#include <GLES3/gl3platform.h>
+#ifndef GL_APIENTRYP
+#define GL_APIENTRYP GL_APIENTRY*
+#endif
+
+/* Generated on date 20140620 */
+
+/* Generated C header for:
+ * API: gles2
+ * Profile: common
+ * Versions considered: 2.[0-9]|3.[01]
+ * Versions emitted: .*
+ * Default extensions included: None
+ * Additional extensions included: _nomatch_^
+ * Extensions removed: _nomatch_^
+ */
+
+#ifndef GL_ES_VERSION_2_0
+#define GL_ES_VERSION_2_0 1
+#include <KHR/khrplatform.h>
+typedef khronos_int8_t GLbyte;
+typedef khronos_float_t GLclampf;
+typedef khronos_int32_t GLfixed;
+typedef short GLshort;
+typedef unsigned short GLushort;
+typedef void GLvoid;
+typedef struct __GLsync *GLsync;
+typedef khronos_int64_t GLint64;
+typedef khronos_uint64_t GLuint64;
+typedef unsigned int GLenum;
+typedef unsigned int GLuint;
+typedef char GLchar;
+typedef khronos_float_t GLfloat;
+typedef khronos_ssize_t GLsizeiptr;
+typedef khronos_intptr_t GLintptr;
+typedef unsigned int GLbitfield;
+typedef int GLint;
+typedef unsigned char GLboolean;
+typedef int GLsizei;
+typedef khronos_uint8_t GLubyte;
+#define GL_DEPTH_BUFFER_BIT               0x00000100
+#define GL_STENCIL_BUFFER_BIT             0x00000400
+#define GL_COLOR_BUFFER_BIT               0x00004000
+#define GL_FALSE                          0
+#define GL_TRUE                           1
+#define GL_POINTS                         0x0000
+#define GL_LINES                          0x0001
+#define GL_LINE_LOOP                      0x0002
+#define GL_LINE_STRIP                     0x0003
+#define GL_TRIANGLES                      0x0004
+#define GL_TRIANGLE_STRIP                 0x0005
+#define GL_TRIANGLE_FAN                   0x0006
+#define GL_ZERO                           0
+#define GL_ONE                            1
+#define GL_SRC_COLOR                      0x0300
+#define GL_ONE_MINUS_SRC_COLOR            0x0301
+#define GL_SRC_ALPHA                      0x0302
+#define GL_ONE_MINUS_SRC_ALPHA            0x0303
+#define GL_DST_ALPHA                      0x0304
+#define GL_ONE_MINUS_DST_ALPHA            0x0305
+#define GL_DST_COLOR                      0x0306
+#define GL_ONE_MINUS_DST_COLOR            0x0307
+#define GL_SRC_ALPHA_SATURATE             0x0308
+#define GL_FUNC_ADD                       0x8006
+#define GL_BLEND_EQUATION                 0x8009
+#define GL_BLEND_EQUATION_RGB             0x8009
+#define GL_BLEND_EQUATION_ALPHA           0x883D
+#define GL_FUNC_SUBTRACT                  0x800A
+#define GL_FUNC_REVERSE_SUBTRACT          0x800B
+#define GL_BLEND_DST_RGB                  0x80C8
+#define GL_BLEND_SRC_RGB                  0x80C9
+#define GL_BLEND_DST_ALPHA                0x80CA
+#define GL_BLEND_SRC_ALPHA                0x80CB
+#define GL_CONSTANT_COLOR                 0x8001
+#define GL_ONE_MINUS_CONSTANT_COLOR       0x8002
+#define GL_CONSTANT_ALPHA                 0x8003
+#define GL_ONE_MINUS_CONSTANT_ALPHA       0x8004
+#define GL_BLEND_COLOR                    0x8005
+#define GL_ARRAY_BUFFER                   0x8892
+#define GL_ELEMENT_ARRAY_BUFFER           0x8893
+#define GL_ARRAY_BUFFER_BINDING           0x8894
+#define GL_ELEMENT_ARRAY_BUFFER_BINDING   0x8895
+#define GL_STREAM_DRAW                    0x88E0
+#define GL_STATIC_DRAW                    0x88E4
+#define GL_DYNAMIC_DRAW                   0x88E8
+#define GL_BUFFER_SIZE                    0x8764
+#define GL_BUFFER_USAGE                   0x8765
+#define GL_CURRENT_VERTEX_ATTRIB          0x8626
+#define GL_FRONT                          0x0404
+#define GL_BACK                           0x0405
+#define GL_FRONT_AND_BACK                 0x0408
+#define GL_TEXTURE_2D                     0x0DE1
+#define GL_CULL_FACE                      0x0B44
+#define GL_BLEND                          0x0BE2
+#define GL_DITHER                         0x0BD0
+#define GL_STENCIL_TEST                   0x0B90
+#define GL_DEPTH_TEST                     0x0B71
+#define GL_SCISSOR_TEST                   0x0C11
+#define GL_POLYGON_OFFSET_FILL            0x8037
+#define GL_SAMPLE_ALPHA_TO_COVERAGE       0x809E
+#define GL_SAMPLE_COVERAGE                0x80A0
+#define GL_NO_ERROR                       0
+#define GL_INVALID_ENUM                   0x0500
+#define GL_INVALID_VALUE                  0x0501
+#define GL_INVALID_OPERATION              0x0502
+#define GL_OUT_OF_MEMORY                  0x0505
+#define GL_CW                             0x0900
+#define GL_CCW                            0x0901
+#define GL_LINE_WIDTH                     0x0B21
+#define GL_ALIASED_POINT_SIZE_RANGE       0x846D
+#define GL_ALIASED_LINE_WIDTH_RANGE       0x846E
+#define GL_CULL_FACE_MODE                 0x0B45
+#define GL_FRONT_FACE                     0x0B46
+#define GL_DEPTH_RANGE                    0x0B70
+#define GL_DEPTH_WRITEMASK                0x0B72
+#define GL_DEPTH_CLEAR_VALUE              0x0B73
+#define GL_DEPTH_FUNC                     0x0B74
+#define GL_STENCIL_CLEAR_VALUE            0x0B91
+#define GL_STENCIL_FUNC                   0x0B92
+#define GL_STENCIL_FAIL                   0x0B94
+#define GL_STENCIL_PASS_DEPTH_FAIL        0x0B95
+#define GL_STENCIL_PASS_DEPTH_PASS        0x0B96
+#define GL_STENCIL_REF                    0x0B97
+#define GL_STENCIL_VALUE_MASK             0x0B93
+#define GL_STENCIL_WRITEMASK              0x0B98
+#define GL_STENCIL_BACK_FUNC              0x8800
+#define GL_STENCIL_BACK_FAIL              0x8801
+#define GL_STENCIL_BACK_PASS_DEPTH_FAIL   0x8802
+#define GL_STENCIL_BACK_PASS_DEPTH_PASS   0x8803
+#define GL_STENCIL_BACK_REF               0x8CA3
+#define GL_STENCIL_BACK_VALUE_MASK        0x8CA4
+#define GL_STENCIL_BACK_WRITEMASK         0x8CA5
+#define GL_VIEWPORT                       0x0BA2
+#define GL_SCISSOR_BOX                    0x0C10
+#define GL_COLOR_CLEAR_VALUE              0x0C22
+#define GL_COLOR_WRITEMASK                0x0C23
+#define GL_UNPACK_ALIGNMENT               0x0CF5
+#define GL_PACK_ALIGNMENT                 0x0D05
+#define GL_MAX_TEXTURE_SIZE               0x0D33
+#define GL_MAX_VIEWPORT_DIMS              0x0D3A
+#define GL_SUBPIXEL_BITS                  0x0D50
+#define GL_RED_BITS                       0x0D52
+#define GL_GREEN_BITS                     0x0D53
+#define GL_BLUE_BITS                      0x0D54
+#define GL_ALPHA_BITS                     0x0D55
+#define GL_DEPTH_BITS                     0x0D56
+#define GL_STENCIL_BITS                   0x0D57
+#define GL_POLYGON_OFFSET_UNITS           0x2A00
+#define GL_POLYGON_OFFSET_FACTOR          0x8038
+#define GL_TEXTURE_BINDING_2D             0x8069
+#define GL_SAMPLE_BUFFERS                 0x80A8
+#define GL_SAMPLES                        0x80A9
+#define GL_SAMPLE_COVERAGE_VALUE          0x80AA
+#define GL_SAMPLE_COVERAGE_INVERT         0x80AB
+#define GL_NUM_COMPRESSED_TEXTURE_FORMATS 0x86A2
+#define GL_COMPRESSED_TEXTURE_FORMATS     0x86A3
+#define GL_DONT_CARE                      0x1100
+#define GL_FASTEST                        0x1101
+#define GL_NICEST                         0x1102
+#define GL_GENERATE_MIPMAP_HINT           0x8192
+#define GL_BYTE                           0x1400
+#define GL_UNSIGNED_BYTE                  0x1401
+#define GL_SHORT                          0x1402
+#define GL_UNSIGNED_SHORT                 0x1403
+#define GL_INT                            0x1404
+#define GL_UNSIGNED_INT                   0x1405
+#define GL_FLOAT                          0x1406
+#define GL_FIXED                          0x140C
+#define GL_DEPTH_COMPONENT                0x1902
+#define GL_ALPHA                          0x1906
+#define GL_RGB                            0x1907
+#define GL_RGBA                           0x1908
+#define GL_LUMINANCE                      0x1909
+#define GL_LUMINANCE_ALPHA                0x190A
+#define GL_UNSIGNED_SHORT_4_4_4_4         0x8033
+#define GL_UNSIGNED_SHORT_5_5_5_1         0x8034
+#define GL_UNSIGNED_SHORT_5_6_5           0x8363
+#define GL_FRAGMENT_SHADER                0x8B30
+#define GL_VERTEX_SHADER                  0x8B31
+#define GL_MAX_VERTEX_ATTRIBS             0x8869
+#define GL_MAX_VERTEX_UNIFORM_VECTORS     0x8DFB
+#define GL_MAX_VARYING_VECTORS            0x8DFC
+#define GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS 0x8B4D
+#define GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS 0x8B4C
+#define GL_MAX_TEXTURE_IMAGE_UNITS        0x8872
+#define GL_MAX_FRAGMENT_UNIFORM_VECTORS   0x8DFD
+#define GL_SHADER_TYPE                    0x8B4F
+#define GL_DELETE_STATUS                  0x8B80
+#define GL_LINK_STATUS                    0x8B82
+#define GL_VALIDATE_STATUS                0x8B83
+#define GL_ATTACHED_SHADERS               0x8B85
+#define GL_ACTIVE_UNIFORMS                0x8B86
+#define GL_ACTIVE_UNIFORM_MAX_LENGTH      0x8B87
+#define GL_ACTIVE_ATTRIBUTES              0x8B89
+#define GL_ACTIVE_ATTRIBUTE_MAX_LENGTH    0x8B8A
+#define GL_SHADING_LANGUAGE_VERSION       0x8B8C
+#define GL_CURRENT_PROGRAM                0x8B8D
+#define GL_NEVER                          0x0200
+#define GL_LESS                           0x0201
+#define GL_EQUAL                          0x0202
+#define GL_LEQUAL                         0x0203
+#define GL_GREATER                        0x0204
+#define GL_NOTEQUAL                       0x0205
+#define GL_GEQUAL                         0x0206
+#define GL_ALWAYS                         0x0207
+#define GL_KEEP                           0x1E00
+#define GL_REPLACE                        0x1E01
+#define GL_INCR                           0x1E02
+#define GL_DECR                           0x1E03
+#define GL_INVERT                         0x150A
+#define GL_INCR_WRAP                      0x8507
+#define GL_DECR_WRAP                      0x8508
+#define GL_VENDOR                         0x1F00
+#define GL_RENDERER                       0x1F01
+#define GL_VERSION                        0x1F02
+#define GL_EXTENSIONS                     0x1F03
+#define GL_NEAREST                        0x2600
+#define GL_LINEAR                         0x2601
+#define GL_NEAREST_MIPMAP_NEAREST         0x2700
+#define GL_LINEAR_MIPMAP_NEAREST          0x2701
+#define GL_NEAREST_MIPMAP_LINEAR          0x2702
+#define GL_LINEAR_MIPMAP_LINEAR           0x2703
+#define GL_TEXTURE_MAG_FILTER             0x2800
+#define GL_TEXTURE_MIN_FILTER             0x2801
+#define GL_TEXTURE_WRAP_S                 0x2802
+#define GL_TEXTURE_WRAP_T                 0x2803
+#define GL_TEXTURE                        0x1702
+#define GL_TEXTURE_CUBE_MAP               0x8513
+#define GL_TEXTURE_BINDING_CUBE_MAP       0x8514
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_X    0x8515
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X    0x8516
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y    0x8517
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y    0x8518
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z    0x8519
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z    0x851A
+#define GL_MAX_CUBE_MAP_TEXTURE_SIZE      0x851C
+#define GL_TEXTURE0                       0x84C0
+#define GL_TEXTURE1                       0x84C1
+#define GL_TEXTURE2                       0x84C2
+#define GL_TEXTURE3                       0x84C3
+#define GL_TEXTURE4                       0x84C4
+#define GL_TEXTURE5                       0x84C5
+#define GL_TEXTURE6                       0x84C6
+#define GL_TEXTURE7                       0x84C7
+#define GL_TEXTURE8                       0x84C8
+#define GL_TEXTURE9                       0x84C9
+#define GL_TEXTURE10                      0x84CA
+#define GL_TEXTURE11                      0x84CB
+#define GL_TEXTURE12                      0x84CC
+#define GL_TEXTURE13                      0x84CD
+#define GL_TEXTURE14                      0x84CE
+#define GL_TEXTURE15                      0x84CF
+#define GL_TEXTURE16                      0x84D0
+#define GL_TEXTURE17                      0x84D1
+#define GL_TEXTURE18                      0x84D2
+#define GL_TEXTURE19                      0x84D3
+#define GL_TEXTURE20                      0x84D4
+#define GL_TEXTURE21                      0x84D5
+#define GL_TEXTURE22                      0x84D6
+#define GL_TEXTURE23                      0x84D7
+#define GL_TEXTURE24                      0x84D8
+#define GL_TEXTURE25                      0x84D9
+#define GL_TEXTURE26                      0x84DA
+#define GL_TEXTURE27                      0x84DB
+#define GL_TEXTURE28                      0x84DC
+#define GL_TEXTURE29                      0x84DD
+#define GL_TEXTURE30                      0x84DE
+#define GL_TEXTURE31                      0x84DF
+#define GL_ACTIVE_TEXTURE                 0x84E0
+#define GL_REPEAT                         0x2901
+#define GL_CLAMP_TO_EDGE                  0x812F
+#define GL_MIRRORED_REPEAT                0x8370
+#define GL_FLOAT_VEC2                     0x8B50
+#define GL_FLOAT_VEC3                     0x8B51
+#define GL_FLOAT_VEC4                     0x8B52
+#define GL_INT_VEC2                       0x8B53
+#define GL_INT_VEC3                       0x8B54
+#define GL_INT_VEC4                       0x8B55
+#define GL_BOOL                           0x8B56
+#define GL_BOOL_VEC2                      0x8B57
+#define GL_BOOL_VEC3                      0x8B58
+#define GL_BOOL_VEC4                      0x8B59
+#define GL_FLOAT_MAT2                     0x8B5A
+#define GL_FLOAT_MAT3                     0x8B5B
+#define GL_FLOAT_MAT4                     0x8B5C
+#define GL_SAMPLER_2D                     0x8B5E
+#define GL_SAMPLER_CUBE                   0x8B60
+#define GL_VERTEX_ATTRIB_ARRAY_ENABLED    0x8622
+#define GL_VERTEX_ATTRIB_ARRAY_SIZE       0x8623
+#define GL_VERTEX_ATTRIB_ARRAY_STRIDE     0x8624
+#define GL_VERTEX_ATTRIB_ARRAY_TYPE       0x8625
+#define GL_VERTEX_ATTRIB_ARRAY_NORMALIZED 0x886A
+#define GL_VERTEX_ATTRIB_ARRAY_POINTER    0x8645
+#define GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING 0x889F
+#define GL_IMPLEMENTATION_COLOR_READ_TYPE 0x8B9A
+#define GL_IMPLEMENTATION_COLOR_READ_FORMAT 0x8B9B
+#define GL_COMPILE_STATUS                 0x8B81
+#define GL_INFO_LOG_LENGTH                0x8B84
+#define GL_SHADER_SOURCE_LENGTH           0x8B88
+#define GL_SHADER_COMPILER                0x8DFA
+#define GL_SHADER_BINARY_FORMATS          0x8DF8
+#define GL_NUM_SHADER_BINARY_FORMATS      0x8DF9
+#define GL_LOW_FLOAT                      0x8DF0
+#define GL_MEDIUM_FLOAT                   0x8DF1
+#define GL_HIGH_FLOAT                     0x8DF2
+#define GL_LOW_INT                        0x8DF3
+#define GL_MEDIUM_INT                     0x8DF4
+#define GL_HIGH_INT                       0x8DF5
+#define GL_FRAMEBUFFER                    0x8D40
+#define GL_RENDERBUFFER                   0x8D41
+#define GL_RGBA4                          0x8056
+#define GL_RGB5_A1                        0x8057
+#define GL_RGB565                         0x8D62
+#define GL_DEPTH_COMPONENT16              0x81A5
+#define GL_STENCIL_INDEX8                 0x8D48
+#define GL_RENDERBUFFER_WIDTH             0x8D42
+#define GL_RENDERBUFFER_HEIGHT            0x8D43
+#define GL_RENDERBUFFER_INTERNAL_FORMAT   0x8D44
+#define GL_RENDERBUFFER_RED_SIZE          0x8D50
+#define GL_RENDERBUFFER_GREEN_SIZE        0x8D51
+#define GL_RENDERBUFFER_BLUE_SIZE         0x8D52
+#define GL_RENDERBUFFER_ALPHA_SIZE        0x8D53
+#define GL_RENDERBUFFER_DEPTH_SIZE        0x8D54
+#define GL_RENDERBUFFER_STENCIL_SIZE      0x8D55
+#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE 0x8CD0
+#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME 0x8CD1
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL 0x8CD2
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE 0x8CD3
+#define GL_COLOR_ATTACHMENT0              0x8CE0
+#define GL_DEPTH_ATTACHMENT               0x8D00
+#define GL_STENCIL_ATTACHMENT             0x8D20
+#define GL_NONE                           0
+#define GL_FRAMEBUFFER_COMPLETE           0x8CD5
+#define GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT 0x8CD6
+#define GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT 0x8CD7
+#define GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS 0x8CD9
+#define GL_FRAMEBUFFER_UNSUPPORTED        0x8CDD
+#define GL_FRAMEBUFFER_BINDING            0x8CA6
+#define GL_RENDERBUFFER_BINDING           0x8CA7
+#define GL_MAX_RENDERBUFFER_SIZE          0x84E8
+#define GL_INVALID_FRAMEBUFFER_OPERATION  0x0506
+GL_APICALL void GL_APIENTRY glActiveTexture (GLenum texture);
+GL_APICALL void GL_APIENTRY glAttachShader (GLuint program, GLuint shader);
+GL_APICALL void GL_APIENTRY glBindAttribLocation (GLuint program, GLuint index, const GLchar *name);
+GL_APICALL void GL_APIENTRY glBindBuffer (GLenum target, GLuint buffer);
+GL_APICALL void GL_APIENTRY glBindFramebuffer (GLenum target, GLuint framebuffer);
+GL_APICALL void GL_APIENTRY glBindRenderbuffer (GLenum target, GLuint renderbuffer);
+GL_APICALL void GL_APIENTRY glBindTexture (GLenum target, GLuint texture);
+GL_APICALL void GL_APIENTRY glBlendColor (GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha);
+GL_APICALL void GL_APIENTRY glBlendEquation (GLenum mode);
+GL_APICALL void GL_APIENTRY glBlendEquationSeparate (GLenum modeRGB, GLenum modeAlpha);
+GL_APICALL void GL_APIENTRY glBlendFunc (GLenum sfactor, GLenum dfactor);
+GL_APICALL void GL_APIENTRY glBlendFuncSeparate (GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorAlpha, GLenum dfactorAlpha);
+GL_APICALL void GL_APIENTRY glBufferData (GLenum target, GLsizeiptr size, const void *data, GLenum usage);
+GL_APICALL void GL_APIENTRY glBufferSubData (GLenum target, GLintptr offset, GLsizeiptr size, const void *data);
+GL_APICALL GLenum GL_APIENTRY glCheckFramebufferStatus (GLenum target);
+GL_APICALL void GL_APIENTRY glClear (GLbitfield mask);
+GL_APICALL void GL_APIENTRY glClearColor (GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha);
+GL_APICALL void GL_APIENTRY glClearDepthf (GLfloat d);
+GL_APICALL void GL_APIENTRY glClearStencil (GLint s);
+GL_APICALL void GL_APIENTRY glColorMask (GLboolean red, GLboolean green, GLboolean blue, GLboolean alpha);
+GL_APICALL void GL_APIENTRY glCompileShader (GLuint shader);
+GL_APICALL void GL_APIENTRY glCompressedTexImage2D (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const void *data);
+GL_APICALL void GL_APIENTRY glCompressedTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void *data);
+GL_APICALL void GL_APIENTRY glCopyTexImage2D (GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border);
+GL_APICALL void GL_APIENTRY glCopyTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+GL_APICALL GLuint GL_APIENTRY glCreateProgram (void);
+GL_APICALL GLuint GL_APIENTRY glCreateShader (GLenum type);
+GL_APICALL void GL_APIENTRY glCullFace (GLenum mode);
+GL_APICALL void GL_APIENTRY glDeleteBuffers (GLsizei n, const GLuint *buffers);
+GL_APICALL void GL_APIENTRY glDeleteFramebuffers (GLsizei n, const GLuint *framebuffers);
+GL_APICALL void GL_APIENTRY glDeleteProgram (GLuint program);
+GL_APICALL void GL_APIENTRY glDeleteRenderbuffers (GLsizei n, const GLuint *renderbuffers);
+GL_APICALL void GL_APIENTRY glDeleteShader (GLuint shader);
+GL_APICALL void GL_APIENTRY glDeleteTextures (GLsizei n, const GLuint *textures);
+GL_APICALL void GL_APIENTRY glDepthFunc (GLenum func);
+GL_APICALL void GL_APIENTRY glDepthMask (GLboolean flag);
+GL_APICALL void GL_APIENTRY glDepthRangef (GLfloat n, GLfloat f);
+GL_APICALL void GL_APIENTRY glDetachShader (GLuint program, GLuint shader);
+GL_APICALL void GL_APIENTRY glDisable (GLenum cap);
+GL_APICALL void GL_APIENTRY glDisableVertexAttribArray (GLuint index);
+GL_APICALL void GL_APIENTRY glDrawArrays (GLenum mode, GLint first, GLsizei count);
+GL_APICALL void GL_APIENTRY glDrawElements (GLenum mode, GLsizei count, GLenum type, const void *indices);
+GL_APICALL void GL_APIENTRY glEnable (GLenum cap);
+GL_APICALL void GL_APIENTRY glEnableVertexAttribArray (GLuint index);
+GL_APICALL void GL_APIENTRY glFinish (void);
+GL_APICALL void GL_APIENTRY glFlush (void);
+GL_APICALL void GL_APIENTRY glFramebufferRenderbuffer (GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer);
+GL_APICALL void GL_APIENTRY glFramebufferTexture2D (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
+GL_APICALL void GL_APIENTRY glFrontFace (GLenum mode);
+GL_APICALL void GL_APIENTRY glGenBuffers (GLsizei n, GLuint *buffers);
+GL_APICALL void GL_APIENTRY glGenerateMipmap (GLenum target);
+GL_APICALL void GL_APIENTRY glGenFramebuffers (GLsizei n, GLuint *framebuffers);
+GL_APICALL void GL_APIENTRY glGenRenderbuffers (GLsizei n, GLuint *renderbuffers);
+GL_APICALL void GL_APIENTRY glGenTextures (GLsizei n, GLuint *textures);
+GL_APICALL void GL_APIENTRY glGetActiveAttrib (GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLint *size, GLenum *type, GLchar *name);
+GL_APICALL void GL_APIENTRY glGetActiveUniform (GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLint *size, GLenum *type, GLchar *name);
+GL_APICALL void GL_APIENTRY glGetAttachedShaders (GLuint program, GLsizei maxCount, GLsizei *count, GLuint *shaders);
+GL_APICALL GLint GL_APIENTRY glGetAttribLocation (GLuint program, const GLchar *name);
+GL_APICALL void GL_APIENTRY glGetBooleanv (GLenum pname, GLboolean *data);
+GL_APICALL void GL_APIENTRY glGetBufferParameteriv (GLenum target, GLenum pname, GLint *params);
+GL_APICALL GLenum GL_APIENTRY glGetError (void);
+GL_APICALL void GL_APIENTRY glGetFloatv (GLenum pname, GLfloat *data);
+GL_APICALL void GL_APIENTRY glGetFramebufferAttachmentParameteriv (GLenum target, GLenum attachment, GLenum pname, GLint *params);
+GL_APICALL void GL_APIENTRY glGetIntegerv (GLenum pname, GLint *data);
+GL_APICALL void GL_APIENTRY glGetProgramiv (GLuint program, GLenum pname, GLint *params);
+GL_APICALL void GL_APIENTRY glGetProgramInfoLog (GLuint program, GLsizei bufSize, GLsizei *length, GLchar *infoLog);
+GL_APICALL void GL_APIENTRY glGetRenderbufferParameteriv (GLenum target, GLenum pname, GLint *params);
+GL_APICALL void GL_APIENTRY glGetShaderiv (GLuint shader, GLenum pname, GLint *params);
+GL_APICALL void GL_APIENTRY glGetShaderInfoLog (GLuint shader, GLsizei bufSize, GLsizei *length, GLchar *infoLog);
+GL_APICALL void GL_APIENTRY glGetShaderPrecisionFormat (GLenum shadertype, GLenum precisiontype, GLint *range, GLint *precision);
+GL_APICALL void GL_APIENTRY glGetShaderSource (GLuint shader, GLsizei bufSize, GLsizei *length, GLchar *source);
+GL_APICALL const GLubyte *GL_APIENTRY glGetString (GLenum name);
+GL_APICALL void GL_APIENTRY glGetTexParameterfv (GLenum target, GLenum pname, GLfloat *params);
+GL_APICALL void GL_APIENTRY glGetTexParameteriv (GLenum target, GLenum pname, GLint *params);
+GL_APICALL void GL_APIENTRY glGetUniformfv (GLuint program, GLint location, GLfloat *params);
+GL_APICALL void GL_APIENTRY glGetUniformiv (GLuint program, GLint location, GLint *params);
+GL_APICALL GLint GL_APIENTRY glGetUniformLocation (GLuint program, const GLchar *name);
+GL_APICALL void GL_APIENTRY glGetVertexAttribfv (GLuint index, GLenum pname, GLfloat *params);
+GL_APICALL void GL_APIENTRY glGetVertexAttribiv (GLuint index, GLenum pname, GLint *params);
+GL_APICALL void GL_APIENTRY glGetVertexAttribPointerv (GLuint index, GLenum pname, void **pointer);
+GL_APICALL void GL_APIENTRY glHint (GLenum target, GLenum mode);
+GL_APICALL GLboolean GL_APIENTRY glIsBuffer (GLuint buffer);
+GL_APICALL GLboolean GL_APIENTRY glIsEnabled (GLenum cap);
+GL_APICALL GLboolean GL_APIENTRY glIsFramebuffer (GLuint framebuffer);
+GL_APICALL GLboolean GL_APIENTRY glIsProgram (GLuint program);
+GL_APICALL GLboolean GL_APIENTRY glIsRenderbuffer (GLuint renderbuffer);
+GL_APICALL GLboolean GL_APIENTRY glIsShader (GLuint shader);
+GL_APICALL GLboolean GL_APIENTRY glIsTexture (GLuint texture);
+GL_APICALL void GL_APIENTRY glLineWidth (GLfloat width);
+GL_APICALL void GL_APIENTRY glLinkProgram (GLuint program);
+GL_APICALL void GL_APIENTRY glPixelStorei (GLenum pname, GLint param);
+GL_APICALL void GL_APIENTRY glPolygonOffset (GLfloat factor, GLfloat units);
+GL_APICALL void GL_APIENTRY glReadPixels (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, void *pixels);
+GL_APICALL void GL_APIENTRY glReleaseShaderCompiler (void);
+GL_APICALL void GL_APIENTRY glRenderbufferStorage (GLenum target, GLenum internalformat, GLsizei width, GLsizei height);
+GL_APICALL void GL_APIENTRY glSampleCoverage (GLfloat value, GLboolean invert);
+GL_APICALL void GL_APIENTRY glScissor (GLint x, GLint y, GLsizei width, GLsizei height);
+GL_APICALL void GL_APIENTRY glShaderBinary (GLsizei count, const GLuint *shaders, GLenum binaryformat, const void *binary, GLsizei length);
+GL_APICALL void GL_APIENTRY glShaderSource (GLuint shader, GLsizei count, const GLchar *const*string, const GLint *length);
+GL_APICALL void GL_APIENTRY glStencilFunc (GLenum func, GLint ref, GLuint mask);
+GL_APICALL void GL_APIENTRY glStencilFuncSeparate (GLenum face, GLenum func, GLint ref, GLuint mask);
+GL_APICALL void GL_APIENTRY glStencilMask (GLuint mask);
+GL_APICALL void GL_APIENTRY glStencilMaskSeparate (GLenum face, GLuint mask);
+GL_APICALL void GL_APIENTRY glStencilOp (GLenum fail, GLenum zfail, GLenum zpass);
+GL_APICALL void GL_APIENTRY glStencilOpSeparate (GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass);
+GL_APICALL void GL_APIENTRY glTexImage2D (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const void *pixels);
+GL_APICALL void GL_APIENTRY glTexParameterf (GLenum target, GLenum pname, GLfloat param);
+GL_APICALL void GL_APIENTRY glTexParameterfv (GLenum target, GLenum pname, const GLfloat *params);
+GL_APICALL void GL_APIENTRY glTexParameteri (GLenum target, GLenum pname, GLint param);
+GL_APICALL void GL_APIENTRY glTexParameteriv (GLenum target, GLenum pname, const GLint *params);
+GL_APICALL void GL_APIENTRY glTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const void *pixels);
+GL_APICALL void GL_APIENTRY glUniform1f (GLint location, GLfloat v0);
+GL_APICALL void GL_APIENTRY glUniform1fv (GLint location, GLsizei count, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glUniform1i (GLint location, GLint v0);
+GL_APICALL void GL_APIENTRY glUniform1iv (GLint location, GLsizei count, const GLint *value);
+GL_APICALL void GL_APIENTRY glUniform2f (GLint location, GLfloat v0, GLfloat v1);
+GL_APICALL void GL_APIENTRY glUniform2fv (GLint location, GLsizei count, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glUniform2i (GLint location, GLint v0, GLint v1);
+GL_APICALL void GL_APIENTRY glUniform2iv (GLint location, GLsizei count, const GLint *value);
+GL_APICALL void GL_APIENTRY glUniform3f (GLint location, GLfloat v0, GLfloat v1, GLfloat v2);
+GL_APICALL void GL_APIENTRY glUniform3fv (GLint location, GLsizei count, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glUniform3i (GLint location, GLint v0, GLint v1, GLint v2);
+GL_APICALL void GL_APIENTRY glUniform3iv (GLint location, GLsizei count, const GLint *value);
+GL_APICALL void GL_APIENTRY glUniform4f (GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3);
+GL_APICALL void GL_APIENTRY glUniform4fv (GLint location, GLsizei count, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glUniform4i (GLint location, GLint v0, GLint v1, GLint v2, GLint v3);
+GL_APICALL void GL_APIENTRY glUniform4iv (GLint location, GLsizei count, const GLint *value);
+GL_APICALL void GL_APIENTRY glUniformMatrix2fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glUniformMatrix3fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glUniformMatrix4fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glUseProgram (GLuint program);
+GL_APICALL void GL_APIENTRY glValidateProgram (GLuint program);
+GL_APICALL void GL_APIENTRY glVertexAttrib1f (GLuint index, GLfloat x);
+GL_APICALL void GL_APIENTRY glVertexAttrib1fv (GLuint index, const GLfloat *v);
+GL_APICALL void GL_APIENTRY glVertexAttrib2f (GLuint index, GLfloat x, GLfloat y);
+GL_APICALL void GL_APIENTRY glVertexAttrib2fv (GLuint index, const GLfloat *v);
+GL_APICALL void GL_APIENTRY glVertexAttrib3f (GLuint index, GLfloat x, GLfloat y, GLfloat z);
+GL_APICALL void GL_APIENTRY glVertexAttrib3fv (GLuint index, const GLfloat *v);
+GL_APICALL void GL_APIENTRY glVertexAttrib4f (GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w);
+GL_APICALL void GL_APIENTRY glVertexAttrib4fv (GLuint index, const GLfloat *v);
+GL_APICALL void GL_APIENTRY glVertexAttribPointer (GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const void *pointer);
+GL_APICALL void GL_APIENTRY glViewport (GLint x, GLint y, GLsizei width, GLsizei height);
+#endif /* GL_ES_VERSION_2_0 */
+
+#ifndef GL_ES_VERSION_3_0
+#define GL_ES_VERSION_3_0 1
+typedef unsigned short GLhalf;
+#define GL_READ_BUFFER                    0x0C02
+#define GL_UNPACK_ROW_LENGTH              0x0CF2
+#define GL_UNPACK_SKIP_ROWS               0x0CF3
+#define GL_UNPACK_SKIP_PIXELS             0x0CF4
+#define GL_PACK_ROW_LENGTH                0x0D02
+#define GL_PACK_SKIP_ROWS                 0x0D03
+#define GL_PACK_SKIP_PIXELS               0x0D04
+#define GL_COLOR                          0x1800
+#define GL_DEPTH                          0x1801
+#define GL_STENCIL                        0x1802
+#define GL_RED                            0x1903
+#define GL_RGB8                           0x8051
+#define GL_RGBA8                          0x8058
+#define GL_RGB10_A2                       0x8059
+#define GL_TEXTURE_BINDING_3D             0x806A
+#define GL_UNPACK_SKIP_IMAGES             0x806D
+#define GL_UNPACK_IMAGE_HEIGHT            0x806E
+#define GL_TEXTURE_3D                     0x806F
+#define GL_TEXTURE_WRAP_R                 0x8072
+#define GL_MAX_3D_TEXTURE_SIZE            0x8073
+#define GL_UNSIGNED_INT_2_10_10_10_REV    0x8368
+#define GL_MAX_ELEMENTS_VERTICES          0x80E8
+#define GL_MAX_ELEMENTS_INDICES           0x80E9
+#define GL_TEXTURE_MIN_LOD                0x813A
+#define GL_TEXTURE_MAX_LOD                0x813B
+#define GL_TEXTURE_BASE_LEVEL             0x813C
+#define GL_TEXTURE_MAX_LEVEL              0x813D
+#define GL_MIN                            0x8007
+#define GL_MAX                            0x8008
+#define GL_DEPTH_COMPONENT24              0x81A6
+#define GL_MAX_TEXTURE_LOD_BIAS           0x84FD
+#define GL_TEXTURE_COMPARE_MODE           0x884C
+#define GL_TEXTURE_COMPARE_FUNC           0x884D
+#define GL_CURRENT_QUERY                  0x8865
+#define GL_QUERY_RESULT                   0x8866
+#define GL_QUERY_RESULT_AVAILABLE         0x8867
+#define GL_BUFFER_MAPPED                  0x88BC
+#define GL_BUFFER_MAP_POINTER             0x88BD
+#define GL_STREAM_READ                    0x88E1
+#define GL_STREAM_COPY                    0x88E2
+#define GL_STATIC_READ                    0x88E5
+#define GL_STATIC_COPY                    0x88E6
+#define GL_DYNAMIC_READ                   0x88E9
+#define GL_DYNAMIC_COPY                   0x88EA
+#define GL_MAX_DRAW_BUFFERS               0x8824
+#define GL_DRAW_BUFFER0                   0x8825
+#define GL_DRAW_BUFFER1                   0x8826
+#define GL_DRAW_BUFFER2                   0x8827
+#define GL_DRAW_BUFFER3                   0x8828
+#define GL_DRAW_BUFFER4                   0x8829
+#define GL_DRAW_BUFFER5                   0x882A
+#define GL_DRAW_BUFFER6                   0x882B
+#define GL_DRAW_BUFFER7                   0x882C
+#define GL_DRAW_BUFFER8                   0x882D
+#define GL_DRAW_BUFFER9                   0x882E
+#define GL_DRAW_BUFFER10                  0x882F
+#define GL_DRAW_BUFFER11                  0x8830
+#define GL_DRAW_BUFFER12                  0x8831
+#define GL_DRAW_BUFFER13                  0x8832
+#define GL_DRAW_BUFFER14                  0x8833
+#define GL_DRAW_BUFFER15                  0x8834
+#define GL_MAX_FRAGMENT_UNIFORM_COMPONENTS 0x8B49
+#define GL_MAX_VERTEX_UNIFORM_COMPONENTS  0x8B4A
+#define GL_SAMPLER_3D                     0x8B5F
+#define GL_SAMPLER_2D_SHADOW              0x8B62
+#define GL_FRAGMENT_SHADER_DERIVATIVE_HINT 0x8B8B
+#define GL_PIXEL_PACK_BUFFER              0x88EB
+#define GL_PIXEL_UNPACK_BUFFER            0x88EC
+#define GL_PIXEL_PACK_BUFFER_BINDING      0x88ED
+#define GL_PIXEL_UNPACK_BUFFER_BINDING    0x88EF
+#define GL_FLOAT_MAT2x3                   0x8B65
+#define GL_FLOAT_MAT2x4                   0x8B66
+#define GL_FLOAT_MAT3x2                   0x8B67
+#define GL_FLOAT_MAT3x4                   0x8B68
+#define GL_FLOAT_MAT4x2                   0x8B69
+#define GL_FLOAT_MAT4x3                   0x8B6A
+#define GL_SRGB                           0x8C40
+#define GL_SRGB8                          0x8C41
+#define GL_SRGB8_ALPHA8                   0x8C43
+#define GL_COMPARE_REF_TO_TEXTURE         0x884E
+#define GL_MAJOR_VERSION                  0x821B
+#define GL_MINOR_VERSION                  0x821C
+#define GL_NUM_EXTENSIONS                 0x821D
+#define GL_RGBA32F                        0x8814
+#define GL_RGB32F                         0x8815
+#define GL_RGBA16F                        0x881A
+#define GL_RGB16F                         0x881B
+#define GL_VERTEX_ATTRIB_ARRAY_INTEGER    0x88FD
+#define GL_MAX_ARRAY_TEXTURE_LAYERS       0x88FF
+#define GL_MIN_PROGRAM_TEXEL_OFFSET       0x8904
+#define GL_MAX_PROGRAM_TEXEL_OFFSET       0x8905
+#define GL_MAX_VARYING_COMPONENTS         0x8B4B
+#define GL_TEXTURE_2D_ARRAY               0x8C1A
+#define GL_TEXTURE_BINDING_2D_ARRAY       0x8C1D
+#define GL_R11F_G11F_B10F                 0x8C3A
+#define GL_UNSIGNED_INT_10F_11F_11F_REV   0x8C3B
+#define GL_RGB9_E5                        0x8C3D
+#define GL_UNSIGNED_INT_5_9_9_9_REV       0x8C3E
+#define GL_TRANSFORM_FEEDBACK_VARYING_MAX_LENGTH 0x8C76
+#define GL_TRANSFORM_FEEDBACK_BUFFER_MODE 0x8C7F
+#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS 0x8C80
+#define GL_TRANSFORM_FEEDBACK_VARYINGS    0x8C83
+#define GL_TRANSFORM_FEEDBACK_BUFFER_START 0x8C84
+#define GL_TRANSFORM_FEEDBACK_BUFFER_SIZE 0x8C85
+#define GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN 0x8C88
+#define GL_RASTERIZER_DISCARD             0x8C89
+#define GL_MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS 0x8C8A
+#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS 0x8C8B
+#define GL_INTERLEAVED_ATTRIBS            0x8C8C
+#define GL_SEPARATE_ATTRIBS               0x8C8D
+#define GL_TRANSFORM_FEEDBACK_BUFFER      0x8C8E
+#define GL_TRANSFORM_FEEDBACK_BUFFER_BINDING 0x8C8F
+#define GL_RGBA32UI                       0x8D70
+#define GL_RGB32UI                        0x8D71
+#define GL_RGBA16UI                       0x8D76
+#define GL_RGB16UI                        0x8D77
+#define GL_RGBA8UI                        0x8D7C
+#define GL_RGB8UI                         0x8D7D
+#define GL_RGBA32I                        0x8D82
+#define GL_RGB32I                         0x8D83
+#define GL_RGBA16I                        0x8D88
+#define GL_RGB16I                         0x8D89
+#define GL_RGBA8I                         0x8D8E
+#define GL_RGB8I                          0x8D8F
+#define GL_RED_INTEGER                    0x8D94
+#define GL_RGB_INTEGER                    0x8D98
+#define GL_RGBA_INTEGER                   0x8D99
+#define GL_SAMPLER_2D_ARRAY               0x8DC1
+#define GL_SAMPLER_2D_ARRAY_SHADOW        0x8DC4
+#define GL_SAMPLER_CUBE_SHADOW            0x8DC5
+#define GL_UNSIGNED_INT_VEC2              0x8DC6
+#define GL_UNSIGNED_INT_VEC3              0x8DC7
+#define GL_UNSIGNED_INT_VEC4              0x8DC8
+#define GL_INT_SAMPLER_2D                 0x8DCA
+#define GL_INT_SAMPLER_3D                 0x8DCB
+#define GL_INT_SAMPLER_CUBE               0x8DCC
+#define GL_INT_SAMPLER_2D_ARRAY           0x8DCF
+#define GL_UNSIGNED_INT_SAMPLER_2D        0x8DD2
+#define GL_UNSIGNED_INT_SAMPLER_3D        0x8DD3
+#define GL_UNSIGNED_INT_SAMPLER_CUBE      0x8DD4
+#define GL_UNSIGNED_INT_SAMPLER_2D_ARRAY  0x8DD7
+#define GL_BUFFER_ACCESS_FLAGS            0x911F
+#define GL_BUFFER_MAP_LENGTH              0x9120
+#define GL_BUFFER_MAP_OFFSET              0x9121
+#define GL_DEPTH_COMPONENT32F             0x8CAC
+#define GL_DEPTH32F_STENCIL8              0x8CAD
+#define GL_FLOAT_32_UNSIGNED_INT_24_8_REV 0x8DAD
+#define GL_FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING 0x8210
+#define GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE 0x8211
+#define GL_FRAMEBUFFER_ATTACHMENT_RED_SIZE 0x8212
+#define GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE 0x8213
+#define GL_FRAMEBUFFER_ATTACHMENT_BLUE_SIZE 0x8214
+#define GL_FRAMEBUFFER_ATTACHMENT_ALPHA_SIZE 0x8215
+#define GL_FRAMEBUFFER_ATTACHMENT_DEPTH_SIZE 0x8216
+#define GL_FRAMEBUFFER_ATTACHMENT_STENCIL_SIZE 0x8217
+#define GL_FRAMEBUFFER_DEFAULT            0x8218
+#define GL_FRAMEBUFFER_UNDEFINED          0x8219
+#define GL_DEPTH_STENCIL_ATTACHMENT       0x821A
+#define GL_DEPTH_STENCIL                  0x84F9
+#define GL_UNSIGNED_INT_24_8              0x84FA
+#define GL_DEPTH24_STENCIL8               0x88F0
+#define GL_UNSIGNED_NORMALIZED            0x8C17
+#define GL_DRAW_FRAMEBUFFER_BINDING       0x8CA6
+#define GL_READ_FRAMEBUFFER               0x8CA8
+#define GL_DRAW_FRAMEBUFFER               0x8CA9
+#define GL_READ_FRAMEBUFFER_BINDING       0x8CAA
+#define GL_RENDERBUFFER_SAMPLES           0x8CAB
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LAYER 0x8CD4
+#define GL_MAX_COLOR_ATTACHMENTS          0x8CDF
+#define GL_COLOR_ATTACHMENT1              0x8CE1
+#define GL_COLOR_ATTACHMENT2              0x8CE2
+#define GL_COLOR_ATTACHMENT3              0x8CE3
+#define GL_COLOR_ATTACHMENT4              0x8CE4
+#define GL_COLOR_ATTACHMENT5              0x8CE5
+#define GL_COLOR_ATTACHMENT6              0x8CE6
+#define GL_COLOR_ATTACHMENT7              0x8CE7
+#define GL_COLOR_ATTACHMENT8              0x8CE8
+#define GL_COLOR_ATTACHMENT9              0x8CE9
+#define GL_COLOR_ATTACHMENT10             0x8CEA
+#define GL_COLOR_ATTACHMENT11             0x8CEB
+#define GL_COLOR_ATTACHMENT12             0x8CEC
+#define GL_COLOR_ATTACHMENT13             0x8CED
+#define GL_COLOR_ATTACHMENT14             0x8CEE
+#define GL_COLOR_ATTACHMENT15             0x8CEF
+#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE 0x8D56
+#define GL_MAX_SAMPLES                    0x8D57
+#define GL_HALF_FLOAT                     0x140B
+#define GL_MAP_READ_BIT                   0x0001
+#define GL_MAP_WRITE_BIT                  0x0002
+#define GL_MAP_INVALIDATE_RANGE_BIT       0x0004
+#define GL_MAP_INVALIDATE_BUFFER_BIT      0x0008
+#define GL_MAP_FLUSH_EXPLICIT_BIT         0x0010
+#define GL_MAP_UNSYNCHRONIZED_BIT         0x0020
+#define GL_RG                             0x8227
+#define GL_RG_INTEGER                     0x8228
+#define GL_R8                             0x8229
+#define GL_RG8                            0x822B
+#define GL_R16F                           0x822D
+#define GL_R32F                           0x822E
+#define GL_RG16F                          0x822F
+#define GL_RG32F                          0x8230
+#define GL_R8I                            0x8231
+#define GL_R8UI                           0x8232
+#define GL_R16I                           0x8233
+#define GL_R16UI                          0x8234
+#define GL_R32I                           0x8235
+#define GL_R32UI                          0x8236
+#define GL_RG8I                           0x8237
+#define GL_RG8UI                          0x8238
+#define GL_RG16I                          0x8239
+#define GL_RG16UI                         0x823A
+#define GL_RG32I                          0x823B
+#define GL_RG32UI                         0x823C
+#define GL_VERTEX_ARRAY_BINDING           0x85B5
+#define GL_R8_SNORM                       0x8F94
+#define GL_RG8_SNORM                      0x8F95
+#define GL_RGB8_SNORM                     0x8F96
+#define GL_RGBA8_SNORM                    0x8F97
+#define GL_SIGNED_NORMALIZED              0x8F9C
+#define GL_PRIMITIVE_RESTART_FIXED_INDEX  0x8D69
+#define GL_COPY_READ_BUFFER               0x8F36
+#define GL_COPY_WRITE_BUFFER              0x8F37
+#define GL_COPY_READ_BUFFER_BINDING       0x8F36
+#define GL_COPY_WRITE_BUFFER_BINDING      0x8F37
+#define GL_UNIFORM_BUFFER                 0x8A11
+#define GL_UNIFORM_BUFFER_BINDING         0x8A28
+#define GL_UNIFORM_BUFFER_START           0x8A29
+#define GL_UNIFORM_BUFFER_SIZE            0x8A2A
+#define GL_MAX_VERTEX_UNIFORM_BLOCKS      0x8A2B
+#define GL_MAX_FRAGMENT_UNIFORM_BLOCKS    0x8A2D
+#define GL_MAX_COMBINED_UNIFORM_BLOCKS    0x8A2E
+#define GL_MAX_UNIFORM_BUFFER_BINDINGS    0x8A2F
+#define GL_MAX_UNIFORM_BLOCK_SIZE         0x8A30
+#define GL_MAX_COMBINED_VERTEX_UNIFORM_COMPONENTS 0x8A31
+#define GL_MAX_COMBINED_FRAGMENT_UNIFORM_COMPONENTS 0x8A33
+#define GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT 0x8A34
+#define GL_ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH 0x8A35
+#define GL_ACTIVE_UNIFORM_BLOCKS          0x8A36
+#define GL_UNIFORM_TYPE                   0x8A37
+#define GL_UNIFORM_SIZE                   0x8A38
+#define GL_UNIFORM_NAME_LENGTH            0x8A39
+#define GL_UNIFORM_BLOCK_INDEX            0x8A3A
+#define GL_UNIFORM_OFFSET                 0x8A3B
+#define GL_UNIFORM_ARRAY_STRIDE           0x8A3C
+#define GL_UNIFORM_MATRIX_STRIDE          0x8A3D
+#define GL_UNIFORM_IS_ROW_MAJOR           0x8A3E
+#define GL_UNIFORM_BLOCK_BINDING          0x8A3F
+#define GL_UNIFORM_BLOCK_DATA_SIZE        0x8A40
+#define GL_UNIFORM_BLOCK_NAME_LENGTH      0x8A41
+#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORMS  0x8A42
+#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORM_INDICES 0x8A43
+#define GL_UNIFORM_BLOCK_REFERENCED_BY_VERTEX_SHADER 0x8A44
+#define GL_UNIFORM_BLOCK_REFERENCED_BY_FRAGMENT_SHADER 0x8A46
+#define GL_INVALID_INDEX                  0xFFFFFFFFu
+#define GL_MAX_VERTEX_OUTPUT_COMPONENTS   0x9122
+#define GL_MAX_FRAGMENT_INPUT_COMPONENTS  0x9125
+#define GL_MAX_SERVER_WAIT_TIMEOUT        0x9111
+#define GL_OBJECT_TYPE                    0x9112
+#define GL_SYNC_CONDITION                 0x9113
+#define GL_SYNC_STATUS                    0x9114
+#define GL_SYNC_FLAGS                     0x9115
+#define GL_SYNC_FENCE                     0x9116
+#define GL_SYNC_GPU_COMMANDS_COMPLETE     0x9117
+#define GL_UNSIGNALED                     0x9118
+#define GL_SIGNALED                       0x9119
+#define GL_ALREADY_SIGNALED               0x911A
+#define GL_TIMEOUT_EXPIRED                0x911B
+#define GL_CONDITION_SATISFIED            0x911C
+#define GL_WAIT_FAILED                    0x911D
+#define GL_SYNC_FLUSH_COMMANDS_BIT        0x00000001
+#define GL_TIMEOUT_IGNORED                0xFFFFFFFFFFFFFFFFull
+#define GL_VERTEX_ATTRIB_ARRAY_DIVISOR    0x88FE
+#define GL_ANY_SAMPLES_PASSED             0x8C2F
+#define GL_ANY_SAMPLES_PASSED_CONSERVATIVE 0x8D6A
+#define GL_SAMPLER_BINDING                0x8919
+#define GL_RGB10_A2UI                     0x906F
+#define GL_TEXTURE_SWIZZLE_R              0x8E42
+#define GL_TEXTURE_SWIZZLE_G              0x8E43
+#define GL_TEXTURE_SWIZZLE_B              0x8E44
+#define GL_TEXTURE_SWIZZLE_A              0x8E45
+#define GL_GREEN                          0x1904
+#define GL_BLUE                           0x1905
+#define GL_INT_2_10_10_10_REV             0x8D9F
+#define GL_TRANSFORM_FEEDBACK             0x8E22
+#define GL_TRANSFORM_FEEDBACK_PAUSED      0x8E23
+#define GL_TRANSFORM_FEEDBACK_ACTIVE      0x8E24
+#define GL_TRANSFORM_FEEDBACK_BINDING     0x8E25
+#define GL_PROGRAM_BINARY_RETRIEVABLE_HINT 0x8257
+#define GL_PROGRAM_BINARY_LENGTH          0x8741
+#define GL_NUM_PROGRAM_BINARY_FORMATS     0x87FE
+#define GL_PROGRAM_BINARY_FORMATS         0x87FF
+#define GL_COMPRESSED_R11_EAC             0x9270
+#define GL_COMPRESSED_SIGNED_R11_EAC      0x9271
+#define GL_COMPRESSED_RG11_EAC            0x9272
+#define GL_COMPRESSED_SIGNED_RG11_EAC     0x9273
+#define GL_COMPRESSED_RGB8_ETC2           0x9274
+#define GL_COMPRESSED_SRGB8_ETC2          0x9275
+#define GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2 0x9276
+#define GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2 0x9277
+#define GL_COMPRESSED_RGBA8_ETC2_EAC      0x9278
+#define GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC 0x9279
+#define GL_TEXTURE_IMMUTABLE_FORMAT       0x912F
+#define GL_MAX_ELEMENT_INDEX              0x8D6B
+#define GL_NUM_SAMPLE_COUNTS              0x9380
+#define GL_TEXTURE_IMMUTABLE_LEVELS       0x82DF
+GL_APICALL void GL_APIENTRY glReadBuffer (GLenum mode);
+GL_APICALL void GL_APIENTRY glDrawRangeElements (GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const void *indices);
+GL_APICALL void GL_APIENTRY glTexImage3D (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const void *pixels);
+GL_APICALL void GL_APIENTRY glTexSubImage3D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void *pixels);
+GL_APICALL void GL_APIENTRY glCopyTexSubImage3D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height);
+GL_APICALL void GL_APIENTRY glCompressedTexImage3D (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const void *data);
+GL_APICALL void GL_APIENTRY glCompressedTexSubImage3D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const void *data);
+GL_APICALL void GL_APIENTRY glGenQueries (GLsizei n, GLuint *ids);
+GL_APICALL void GL_APIENTRY glDeleteQueries (GLsizei n, const GLuint *ids);
+GL_APICALL GLboolean GL_APIENTRY glIsQuery (GLuint id);
+GL_APICALL void GL_APIENTRY glBeginQuery (GLenum target, GLuint id);
+GL_APICALL void GL_APIENTRY glEndQuery (GLenum target);
+GL_APICALL void GL_APIENTRY glGetQueryiv (GLenum target, GLenum pname, GLint *params);
+GL_APICALL void GL_APIENTRY glGetQueryObjectuiv (GLuint id, GLenum pname, GLuint *params);
+GL_APICALL GLboolean GL_APIENTRY glUnmapBuffer (GLenum target);
+GL_APICALL void GL_APIENTRY glGetBufferPointerv (GLenum target, GLenum pname, void **params);
+GL_APICALL void GL_APIENTRY glDrawBuffers (GLsizei n, const GLenum *bufs);
+GL_APICALL void GL_APIENTRY glUniformMatrix2x3fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glUniformMatrix3x2fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glUniformMatrix2x4fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glUniformMatrix4x2fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glUniformMatrix3x4fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glUniformMatrix4x3fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glBlitFramebuffer (GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter);
+GL_APICALL void GL_APIENTRY glRenderbufferStorageMultisample (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height);
+GL_APICALL void GL_APIENTRY glFramebufferTextureLayer (GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer);
+GL_APICALL void *GL_APIENTRY glMapBufferRange (GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access);
+GL_APICALL void GL_APIENTRY glFlushMappedBufferRange (GLenum target, GLintptr offset, GLsizeiptr length);
+GL_APICALL void GL_APIENTRY glBindVertexArray (GLuint array);
+GL_APICALL void GL_APIENTRY glDeleteVertexArrays (GLsizei n, const GLuint *arrays);
+GL_APICALL void GL_APIENTRY glGenVertexArrays (GLsizei n, GLuint *arrays);
+GL_APICALL GLboolean GL_APIENTRY glIsVertexArray (GLuint array);
+GL_APICALL void GL_APIENTRY glGetIntegeri_v (GLenum target, GLuint index, GLint *data);
+GL_APICALL void GL_APIENTRY glBeginTransformFeedback (GLenum primitiveMode);
+GL_APICALL void GL_APIENTRY glEndTransformFeedback (void);
+GL_APICALL void GL_APIENTRY glBindBufferRange (GLenum target, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size);
+GL_APICALL void GL_APIENTRY glBindBufferBase (GLenum target, GLuint index, GLuint buffer);
+GL_APICALL void GL_APIENTRY glTransformFeedbackVaryings (GLuint program, GLsizei count, const GLchar *const*varyings, GLenum bufferMode);
+GL_APICALL void GL_APIENTRY glGetTransformFeedbackVarying (GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLsizei *size, GLenum *type, GLchar *name);
+GL_APICALL void GL_APIENTRY glVertexAttribIPointer (GLuint index, GLint size, GLenum type, GLsizei stride, const void *pointer);
+GL_APICALL void GL_APIENTRY glGetVertexAttribIiv (GLuint index, GLenum pname, GLint *params);
+GL_APICALL void GL_APIENTRY glGetVertexAttribIuiv (GLuint index, GLenum pname, GLuint *params);
+GL_APICALL void GL_APIENTRY glVertexAttribI4i (GLuint index, GLint x, GLint y, GLint z, GLint w);
+GL_APICALL void GL_APIENTRY glVertexAttribI4ui (GLuint index, GLuint x, GLuint y, GLuint z, GLuint w);
+GL_APICALL void GL_APIENTRY glVertexAttribI4iv (GLuint index, const GLint *v);
+GL_APICALL void GL_APIENTRY glVertexAttribI4uiv (GLuint index, const GLuint *v);
+GL_APICALL void GL_APIENTRY glGetUniformuiv (GLuint program, GLint location, GLuint *params);
+GL_APICALL GLint GL_APIENTRY glGetFragDataLocation (GLuint program, const GLchar *name);
+GL_APICALL void GL_APIENTRY glUniform1ui (GLint location, GLuint v0);
+GL_APICALL void GL_APIENTRY glUniform2ui (GLint location, GLuint v0, GLuint v1);
+GL_APICALL void GL_APIENTRY glUniform3ui (GLint location, GLuint v0, GLuint v1, GLuint v2);
+GL_APICALL void GL_APIENTRY glUniform4ui (GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+GL_APICALL void GL_APIENTRY glUniform1uiv (GLint location, GLsizei count, const GLuint *value);
+GL_APICALL void GL_APIENTRY glUniform2uiv (GLint location, GLsizei count, const GLuint *value);
+GL_APICALL void GL_APIENTRY glUniform3uiv (GLint location, GLsizei count, const GLuint *value);
+GL_APICALL void GL_APIENTRY glUniform4uiv (GLint location, GLsizei count, const GLuint *value);
+GL_APICALL void GL_APIENTRY glClearBufferiv (GLenum buffer, GLint drawbuffer, const GLint *value);
+GL_APICALL void GL_APIENTRY glClearBufferuiv (GLenum buffer, GLint drawbuffer, const GLuint *value);
+GL_APICALL void GL_APIENTRY glClearBufferfv (GLenum buffer, GLint drawbuffer, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glClearBufferfi (GLenum buffer, GLint drawbuffer, GLfloat depth, GLint stencil);
+GL_APICALL const GLubyte *GL_APIENTRY glGetStringi (GLenum name, GLuint index);
+GL_APICALL void GL_APIENTRY glCopyBufferSubData (GLenum readTarget, GLenum writeTarget, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size);
+GL_APICALL void GL_APIENTRY glGetUniformIndices (GLuint program, GLsizei uniformCount, const GLchar *const*uniformNames, GLuint *uniformIndices);
+GL_APICALL void GL_APIENTRY glGetActiveUniformsiv (GLuint program, GLsizei uniformCount, const GLuint *uniformIndices, GLenum pname, GLint *params);
+GL_APICALL GLuint GL_APIENTRY glGetUniformBlockIndex (GLuint program, const GLchar *uniformBlockName);
+GL_APICALL void GL_APIENTRY glGetActiveUniformBlockiv (GLuint program, GLuint uniformBlockIndex, GLenum pname, GLint *params);
+GL_APICALL void GL_APIENTRY glGetActiveUniformBlockName (GLuint program, GLuint uniformBlockIndex, GLsizei bufSize, GLsizei *length, GLchar *uniformBlockName);
+GL_APICALL void GL_APIENTRY glUniformBlockBinding (GLuint program, GLuint uniformBlockIndex, GLuint uniformBlockBinding);
+GL_APICALL void GL_APIENTRY glDrawArraysInstanced (GLenum mode, GLint first, GLsizei count, GLsizei instancecount);
+GL_APICALL void GL_APIENTRY glDrawElementsInstanced (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei instancecount);
+GL_APICALL GLsync GL_APIENTRY glFenceSync (GLenum condition, GLbitfield flags);
+GL_APICALL GLboolean GL_APIENTRY glIsSync (GLsync sync);
+GL_APICALL void GL_APIENTRY glDeleteSync (GLsync sync);
+GL_APICALL GLenum GL_APIENTRY glClientWaitSync (GLsync sync, GLbitfield flags, GLuint64 timeout);
+GL_APICALL void GL_APIENTRY glWaitSync (GLsync sync, GLbitfield flags, GLuint64 timeout);
+GL_APICALL void GL_APIENTRY glGetInteger64v (GLenum pname, GLint64 *data);
+GL_APICALL void GL_APIENTRY glGetSynciv (GLsync sync, GLenum pname, GLsizei bufSize, GLsizei *length, GLint *values);
+GL_APICALL void GL_APIENTRY glGetInteger64i_v (GLenum target, GLuint index, GLint64 *data);
+GL_APICALL void GL_APIENTRY glGetBufferParameteri64v (GLenum target, GLenum pname, GLint64 *params);
+GL_APICALL void GL_APIENTRY glGenSamplers (GLsizei count, GLuint *samplers);
+GL_APICALL void GL_APIENTRY glDeleteSamplers (GLsizei count, const GLuint *samplers);
+GL_APICALL GLboolean GL_APIENTRY glIsSampler (GLuint sampler);
+GL_APICALL void GL_APIENTRY glBindSampler (GLuint unit, GLuint sampler);
+GL_APICALL void GL_APIENTRY glSamplerParameteri (GLuint sampler, GLenum pname, GLint param);
+GL_APICALL void GL_APIENTRY glSamplerParameteriv (GLuint sampler, GLenum pname, const GLint *param);
+GL_APICALL void GL_APIENTRY glSamplerParameterf (GLuint sampler, GLenum pname, GLfloat param);
+GL_APICALL void GL_APIENTRY glSamplerParameterfv (GLuint sampler, GLenum pname, const GLfloat *param);
+GL_APICALL void GL_APIENTRY glGetSamplerParameteriv (GLuint sampler, GLenum pname, GLint *params);
+GL_APICALL void GL_APIENTRY glGetSamplerParameterfv (GLuint sampler, GLenum pname, GLfloat *params);
+GL_APICALL void GL_APIENTRY glVertexAttribDivisor (GLuint index, GLuint divisor);
+GL_APICALL void GL_APIENTRY glBindTransformFeedback (GLenum target, GLuint id);
+GL_APICALL void GL_APIENTRY glDeleteTransformFeedbacks (GLsizei n, const GLuint *ids);
+GL_APICALL void GL_APIENTRY glGenTransformFeedbacks (GLsizei n, GLuint *ids);
+GL_APICALL GLboolean GL_APIENTRY glIsTransformFeedback (GLuint id);
+GL_APICALL void GL_APIENTRY glPauseTransformFeedback (void);
+GL_APICALL void GL_APIENTRY glResumeTransformFeedback (void);
+GL_APICALL void GL_APIENTRY glGetProgramBinary (GLuint program, GLsizei bufSize, GLsizei *length, GLenum *binaryFormat, void *binary);
+GL_APICALL void GL_APIENTRY glProgramBinary (GLuint program, GLenum binaryFormat, const void *binary, GLsizei length);
+GL_APICALL void GL_APIENTRY glProgramParameteri (GLuint program, GLenum pname, GLint value);
+GL_APICALL void GL_APIENTRY glInvalidateFramebuffer (GLenum target, GLsizei numAttachments, const GLenum *attachments);
+GL_APICALL void GL_APIENTRY glInvalidateSubFramebuffer (GLenum target, GLsizei numAttachments, const GLenum *attachments, GLint x, GLint y, GLsizei width, GLsizei height);
+GL_APICALL void GL_APIENTRY glTexStorage2D (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
+GL_APICALL void GL_APIENTRY glTexStorage3D (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
+GL_APICALL void GL_APIENTRY glGetInternalformativ (GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint *params);
+#endif /* GL_ES_VERSION_3_0 */
+
+#ifndef GL_ES_VERSION_3_1
+#define GL_ES_VERSION_3_1 1
+#define GL_COMPUTE_SHADER                 0x91B9
+#define GL_MAX_COMPUTE_UNIFORM_BLOCKS     0x91BB
+#define GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS 0x91BC
+#define GL_MAX_COMPUTE_IMAGE_UNIFORMS     0x91BD
+#define GL_MAX_COMPUTE_SHARED_MEMORY_SIZE 0x8262
+#define GL_MAX_COMPUTE_UNIFORM_COMPONENTS 0x8263
+#define GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS 0x8264
+#define GL_MAX_COMPUTE_ATOMIC_COUNTERS    0x8265
+#define GL_MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS 0x8266
+#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB
+#define GL_MAX_COMPUTE_WORK_GROUP_COUNT   0x91BE
+#define GL_MAX_COMPUTE_WORK_GROUP_SIZE    0x91BF
+#define GL_COMPUTE_WORK_GROUP_SIZE        0x8267
+#define GL_DISPATCH_INDIRECT_BUFFER       0x90EE
+#define GL_DISPATCH_INDIRECT_BUFFER_BINDING 0x90EF
+#define GL_COMPUTE_SHADER_BIT             0x00000020
+#define GL_DRAW_INDIRECT_BUFFER           0x8F3F
+#define GL_DRAW_INDIRECT_BUFFER_BINDING   0x8F43
+#define GL_MAX_UNIFORM_LOCATIONS          0x826E
+#define GL_FRAMEBUFFER_DEFAULT_WIDTH      0x9310
+#define GL_FRAMEBUFFER_DEFAULT_HEIGHT     0x9311
+#define GL_FRAMEBUFFER_DEFAULT_SAMPLES    0x9313
+#define GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS 0x9314
+#define GL_MAX_FRAMEBUFFER_WIDTH          0x9315
+#define GL_MAX_FRAMEBUFFER_HEIGHT         0x9316
+#define GL_MAX_FRAMEBUFFER_SAMPLES        0x9318
+#define GL_UNIFORM                        0x92E1
+#define GL_UNIFORM_BLOCK                  0x92E2
+#define GL_PROGRAM_INPUT                  0x92E3
+#define GL_PROGRAM_OUTPUT                 0x92E4
+#define GL_BUFFER_VARIABLE                0x92E5
+#define GL_SHADER_STORAGE_BLOCK           0x92E6
+#define GL_ATOMIC_COUNTER_BUFFER          0x92C0
+#define GL_TRANSFORM_FEEDBACK_VARYING     0x92F4
+#define GL_ACTIVE_RESOURCES               0x92F5
+#define GL_MAX_NAME_LENGTH                0x92F6
+#define GL_MAX_NUM_ACTIVE_VARIABLES       0x92F7
+#define GL_NAME_LENGTH                    0x92F9
+#define GL_TYPE                           0x92FA
+#define GL_ARRAY_SIZE                     0x92FB
+#define GL_OFFSET                         0x92FC
+#define GL_BLOCK_INDEX                    0x92FD
+#define GL_ARRAY_STRIDE                   0x92FE
+#define GL_MATRIX_STRIDE                  0x92FF
+#define GL_IS_ROW_MAJOR                   0x9300
+#define GL_ATOMIC_COUNTER_BUFFER_INDEX    0x9301
+#define GL_BUFFER_BINDING                 0x9302
+#define GL_BUFFER_DATA_SIZE               0x9303
+#define GL_NUM_ACTIVE_VARIABLES           0x9304
+#define GL_ACTIVE_VARIABLES               0x9305
+#define GL_REFERENCED_BY_VERTEX_SHADER    0x9306
+#define GL_REFERENCED_BY_FRAGMENT_SHADER  0x930A
+#define GL_REFERENCED_BY_COMPUTE_SHADER   0x930B
+#define GL_TOP_LEVEL_ARRAY_SIZE           0x930C
+#define GL_TOP_LEVEL_ARRAY_STRIDE         0x930D
+#define GL_LOCATION                       0x930E
+#define GL_VERTEX_SHADER_BIT              0x00000001
+#define GL_FRAGMENT_SHADER_BIT            0x00000002
+#define GL_ALL_SHADER_BITS                0xFFFFFFFF
+#define GL_PROGRAM_SEPARABLE              0x8258
+#define GL_ACTIVE_PROGRAM                 0x8259
+#define GL_PROGRAM_PIPELINE_BINDING       0x825A
+#define GL_ATOMIC_COUNTER_BUFFER_BINDING  0x92C1
+#define GL_ATOMIC_COUNTER_BUFFER_START    0x92C2
+#define GL_ATOMIC_COUNTER_BUFFER_SIZE     0x92C3
+#define GL_MAX_VERTEX_ATOMIC_COUNTER_BUFFERS 0x92CC
+#define GL_MAX_FRAGMENT_ATOMIC_COUNTER_BUFFERS 0x92D0
+#define GL_MAX_COMBINED_ATOMIC_COUNTER_BUFFERS 0x92D1
+#define GL_MAX_VERTEX_ATOMIC_COUNTERS     0x92D2
+#define GL_MAX_FRAGMENT_ATOMIC_COUNTERS   0x92D6
+#define GL_MAX_COMBINED_ATOMIC_COUNTERS   0x92D7
+#define GL_MAX_ATOMIC_COUNTER_BUFFER_SIZE 0x92D8
+#define GL_MAX_ATOMIC_COUNTER_BUFFER_BINDINGS 0x92DC
+#define GL_ACTIVE_ATOMIC_COUNTER_BUFFERS  0x92D9
+#define GL_UNSIGNED_INT_ATOMIC_COUNTER    0x92DB
+#define GL_MAX_IMAGE_UNITS                0x8F38
+#define GL_MAX_VERTEX_IMAGE_UNIFORMS      0x90CA
+#define GL_MAX_FRAGMENT_IMAGE_UNIFORMS    0x90CE
+#define GL_MAX_COMBINED_IMAGE_UNIFORMS    0x90CF
+#define GL_IMAGE_BINDING_NAME             0x8F3A
+#define GL_IMAGE_BINDING_LEVEL            0x8F3B
+#define GL_IMAGE_BINDING_LAYERED          0x8F3C
+#define GL_IMAGE_BINDING_LAYER            0x8F3D
+#define GL_IMAGE_BINDING_ACCESS           0x8F3E
+#define GL_IMAGE_BINDING_FORMAT           0x906E
+#define GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT 0x00000001
+#define GL_ELEMENT_ARRAY_BARRIER_BIT      0x00000002
+#define GL_UNIFORM_BARRIER_BIT            0x00000004
+#define GL_TEXTURE_FETCH_BARRIER_BIT      0x00000008
+#define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020
+#define GL_COMMAND_BARRIER_BIT            0x00000040
+#define GL_PIXEL_BUFFER_BARRIER_BIT       0x00000080
+#define GL_TEXTURE_UPDATE_BARRIER_BIT     0x00000100
+#define GL_BUFFER_UPDATE_BARRIER_BIT      0x00000200
+#define GL_FRAMEBUFFER_BARRIER_BIT        0x00000400
+#define GL_TRANSFORM_FEEDBACK_BARRIER_BIT 0x00000800
+#define GL_ATOMIC_COUNTER_BARRIER_BIT     0x00001000
+#define GL_ALL_BARRIER_BITS               0xFFFFFFFF
+#define GL_IMAGE_2D                       0x904D
+#define GL_IMAGE_3D                       0x904E
+#define GL_IMAGE_CUBE                     0x9050
+#define GL_IMAGE_2D_ARRAY                 0x9053
+#define GL_INT_IMAGE_2D                   0x9058
+#define GL_INT_IMAGE_3D                   0x9059
+#define GL_INT_IMAGE_CUBE                 0x905B
+#define GL_INT_IMAGE_2D_ARRAY             0x905E
+#define GL_UNSIGNED_INT_IMAGE_2D          0x9063
+#define GL_UNSIGNED_INT_IMAGE_3D          0x9064
+#define GL_UNSIGNED_INT_IMAGE_CUBE        0x9066
+#define GL_UNSIGNED_INT_IMAGE_2D_ARRAY    0x9069
+#define GL_IMAGE_FORMAT_COMPATIBILITY_TYPE 0x90C7
+#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE 0x90C8
+#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_CLASS 0x90C9
+#define GL_READ_ONLY                      0x88B8
+#define GL_WRITE_ONLY                     0x88B9
+#define GL_READ_WRITE                     0x88BA
+#define GL_SHADER_STORAGE_BUFFER          0x90D2
+#define GL_SHADER_STORAGE_BUFFER_BINDING  0x90D3
+#define GL_SHADER_STORAGE_BUFFER_START    0x90D4
+#define GL_SHADER_STORAGE_BUFFER_SIZE     0x90D5
+#define GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS 0x90D6
+#define GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS 0x90DA
+#define GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS 0x90DB
+#define GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS 0x90DC
+#define GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS 0x90DD
+#define GL_MAX_SHADER_STORAGE_BLOCK_SIZE  0x90DE
+#define GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT 0x90DF
+#define GL_SHADER_STORAGE_BARRIER_BIT     0x00002000
+#define GL_MAX_COMBINED_SHADER_OUTPUT_RESOURCES 0x8F39
+#define GL_DEPTH_STENCIL_TEXTURE_MODE     0x90EA
+#define GL_STENCIL_INDEX                  0x1901
+#define GL_MIN_PROGRAM_TEXTURE_GATHER_OFFSET 0x8E5E
+#define GL_MAX_PROGRAM_TEXTURE_GATHER_OFFSET 0x8E5F
+#define GL_SAMPLE_POSITION                0x8E50
+#define GL_SAMPLE_MASK                    0x8E51
+#define GL_SAMPLE_MASK_VALUE              0x8E52
+#define GL_TEXTURE_2D_MULTISAMPLE         0x9100
+#define GL_MAX_SAMPLE_MASK_WORDS          0x8E59
+#define GL_MAX_COLOR_TEXTURE_SAMPLES      0x910E
+#define GL_MAX_DEPTH_TEXTURE_SAMPLES      0x910F
+#define GL_MAX_INTEGER_SAMPLES            0x9110
+#define GL_TEXTURE_BINDING_2D_MULTISAMPLE 0x9104
+#define GL_TEXTURE_SAMPLES                0x9106
+#define GL_TEXTURE_FIXED_SAMPLE_LOCATIONS 0x9107
+#define GL_TEXTURE_WIDTH                  0x1000
+#define GL_TEXTURE_HEIGHT                 0x1001
+#define GL_TEXTURE_DEPTH                  0x8071
+#define GL_TEXTURE_INTERNAL_FORMAT        0x1003
+#define GL_TEXTURE_RED_SIZE               0x805C
+#define GL_TEXTURE_GREEN_SIZE             0x805D
+#define GL_TEXTURE_BLUE_SIZE              0x805E
+#define GL_TEXTURE_ALPHA_SIZE             0x805F
+#define GL_TEXTURE_DEPTH_SIZE             0x884A
+#define GL_TEXTURE_STENCIL_SIZE           0x88F1
+#define GL_TEXTURE_SHARED_SIZE            0x8C3F
+#define GL_TEXTURE_RED_TYPE               0x8C10
+#define GL_TEXTURE_GREEN_TYPE             0x8C11
+#define GL_TEXTURE_BLUE_TYPE              0x8C12
+#define GL_TEXTURE_ALPHA_TYPE             0x8C13
+#define GL_TEXTURE_DEPTH_TYPE             0x8C16
+#define GL_TEXTURE_COMPRESSED             0x86A1
+#define GL_SAMPLER_2D_MULTISAMPLE         0x9108
+#define GL_INT_SAMPLER_2D_MULTISAMPLE     0x9109
+#define GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE 0x910A
+#define GL_VERTEX_ATTRIB_BINDING          0x82D4
+#define GL_VERTEX_ATTRIB_RELATIVE_OFFSET  0x82D5
+#define GL_VERTEX_BINDING_DIVISOR         0x82D6
+#define GL_VERTEX_BINDING_OFFSET          0x82D7
+#define GL_VERTEX_BINDING_STRIDE          0x82D8
+#define GL_VERTEX_BINDING_BUFFER          0x8F4F
+#define GL_MAX_VERTEX_ATTRIB_RELATIVE_OFFSET 0x82D9
+#define GL_MAX_VERTEX_ATTRIB_BINDINGS     0x82DA
+#define GL_MAX_VERTEX_ATTRIB_STRIDE       0x82E5
+GL_APICALL void GL_APIENTRY glDispatchCompute (GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z);
+GL_APICALL void GL_APIENTRY glDispatchComputeIndirect (GLintptr indirect);
+GL_APICALL void GL_APIENTRY glDrawArraysIndirect (GLenum mode, const void *indirect);
+GL_APICALL void GL_APIENTRY glDrawElementsIndirect (GLenum mode, GLenum type, const void *indirect);
+GL_APICALL void GL_APIENTRY glFramebufferParameteri (GLenum target, GLenum pname, GLint param);
+GL_APICALL void GL_APIENTRY glGetFramebufferParameteriv (GLenum target, GLenum pname, GLint *params);
+GL_APICALL void GL_APIENTRY glGetProgramInterfaceiv (GLuint program, GLenum programInterface, GLenum pname, GLint *params);
+GL_APICALL GLuint GL_APIENTRY glGetProgramResourceIndex (GLuint program, GLenum programInterface, const GLchar *name);
+GL_APICALL void GL_APIENTRY glGetProgramResourceName (GLuint program, GLenum programInterface, GLuint index, GLsizei bufSize, GLsizei *length, GLchar *name);
+GL_APICALL void GL_APIENTRY glGetProgramResourceiv (GLuint program, GLenum programInterface, GLuint index, GLsizei propCount, const GLenum *props, GLsizei bufSize, GLsizei *length, GLint *params);
+GL_APICALL GLint GL_APIENTRY glGetProgramResourceLocation (GLuint program, GLenum programInterface, const GLchar *name);
+GL_APICALL void GL_APIENTRY glUseProgramStages (GLuint pipeline, GLbitfield stages, GLuint program);
+GL_APICALL void GL_APIENTRY glActiveShaderProgram (GLuint pipeline, GLuint program);
+GL_APICALL GLuint GL_APIENTRY glCreateShaderProgramv (GLenum type, GLsizei count, const GLchar *const*strings);
+GL_APICALL void GL_APIENTRY glBindProgramPipeline (GLuint pipeline);
+GL_APICALL void GL_APIENTRY glDeleteProgramPipelines (GLsizei n, const GLuint *pipelines);
+GL_APICALL void GL_APIENTRY glGenProgramPipelines (GLsizei n, GLuint *pipelines);
+GL_APICALL GLboolean GL_APIENTRY glIsProgramPipeline (GLuint pipeline);
+GL_APICALL void GL_APIENTRY glGetProgramPipelineiv (GLuint pipeline, GLenum pname, GLint *params);
+GL_APICALL void GL_APIENTRY glProgramUniform1i (GLuint program, GLint location, GLint v0);
+GL_APICALL void GL_APIENTRY glProgramUniform2i (GLuint program, GLint location, GLint v0, GLint v1);
+GL_APICALL void GL_APIENTRY glProgramUniform3i (GLuint program, GLint location, GLint v0, GLint v1, GLint v2);
+GL_APICALL void GL_APIENTRY glProgramUniform4i (GLuint program, GLint location, GLint v0, GLint v1, GLint v2, GLint v3);
+GL_APICALL void GL_APIENTRY glProgramUniform1ui (GLuint program, GLint location, GLuint v0);
+GL_APICALL void GL_APIENTRY glProgramUniform2ui (GLuint program, GLint location, GLuint v0, GLuint v1);
+GL_APICALL void GL_APIENTRY glProgramUniform3ui (GLuint program, GLint location, GLuint v0, GLuint v1, GLuint v2);
+GL_APICALL void GL_APIENTRY glProgramUniform4ui (GLuint program, GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+GL_APICALL void GL_APIENTRY glProgramUniform1f (GLuint program, GLint location, GLfloat v0);
+GL_APICALL void GL_APIENTRY glProgramUniform2f (GLuint program, GLint location, GLfloat v0, GLfloat v1);
+GL_APICALL void GL_APIENTRY glProgramUniform3f (GLuint program, GLint location, GLfloat v0, GLfloat v1, GLfloat v2);
+GL_APICALL void GL_APIENTRY glProgramUniform4f (GLuint program, GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3);
+GL_APICALL void GL_APIENTRY glProgramUniform1iv (GLuint program, GLint location, GLsizei count, const GLint *value);
+GL_APICALL void GL_APIENTRY glProgramUniform2iv (GLuint program, GLint location, GLsizei count, const GLint *value);
+GL_APICALL void GL_APIENTRY glProgramUniform3iv (GLuint program, GLint location, GLsizei count, const GLint *value);
+GL_APICALL void GL_APIENTRY glProgramUniform4iv (GLuint program, GLint location, GLsizei count, const GLint *value);
+GL_APICALL void GL_APIENTRY glProgramUniform1uiv (GLuint program, GLint location, GLsizei count, const GLuint *value);
+GL_APICALL void GL_APIENTRY glProgramUniform2uiv (GLuint program, GLint location, GLsizei count, const GLuint *value);
+GL_APICALL void GL_APIENTRY glProgramUniform3uiv (GLuint program, GLint location, GLsizei count, const GLuint *value);
+GL_APICALL void GL_APIENTRY glProgramUniform4uiv (GLuint program, GLint location, GLsizei count, const GLuint *value);
+GL_APICALL void GL_APIENTRY glProgramUniform1fv (GLuint program, GLint location, GLsizei count, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glProgramUniform2fv (GLuint program, GLint location, GLsizei count, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glProgramUniform3fv (GLuint program, GLint location, GLsizei count, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glProgramUniform4fv (GLuint program, GLint location, GLsizei count, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glProgramUniformMatrix2fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glProgramUniformMatrix3fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glProgramUniformMatrix4fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glProgramUniformMatrix2x3fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glProgramUniformMatrix3x2fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glProgramUniformMatrix2x4fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glProgramUniformMatrix4x2fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glProgramUniformMatrix3x4fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glProgramUniformMatrix4x3fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value);
+GL_APICALL void GL_APIENTRY glValidateProgramPipeline (GLuint pipeline);
+GL_APICALL void GL_APIENTRY glGetProgramPipelineInfoLog (GLuint pipeline, GLsizei bufSize, GLsizei *length, GLchar *infoLog);
+GL_APICALL void GL_APIENTRY glBindImageTexture (GLuint unit, GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum access, GLenum format);
+GL_APICALL void GL_APIENTRY glGetBooleani_v (GLenum target, GLuint index, GLboolean *data);
+GL_APICALL void GL_APIENTRY glMemoryBarrier (GLbitfield barriers);
+GL_APICALL void GL_APIENTRY glMemoryBarrierByRegion (GLbitfield barriers);
+GL_APICALL void GL_APIENTRY glTexStorage2DMultisample (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLboolean fixedsamplelocations);
+GL_APICALL void GL_APIENTRY glGetMultisamplefv (GLenum pname, GLuint index, GLfloat *val);
+GL_APICALL void GL_APIENTRY glSampleMaski (GLuint maskNumber, GLbitfield mask);
+GL_APICALL void GL_APIENTRY glGetTexLevelParameteriv (GLenum target, GLint level, GLenum pname, GLint *params);
+GL_APICALL void GL_APIENTRY glGetTexLevelParameterfv (GLenum target, GLint level, GLenum pname, GLfloat *params);
+GL_APICALL void GL_APIENTRY glBindVertexBuffer (GLuint bindingindex, GLuint buffer, GLintptr offset, GLsizei stride);
+GL_APICALL void GL_APIENTRY glVertexAttribFormat (GLuint attribindex, GLint size, GLenum type, GLboolean normalized, GLuint relativeoffset);
+GL_APICALL void GL_APIENTRY glVertexAttribIFormat (GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset);
+GL_APICALL void GL_APIENTRY glVertexAttribBinding (GLuint attribindex, GLuint bindingindex);
+GL_APICALL void GL_APIENTRY glVertexBindingDivisor (GLuint bindingindex, GLuint divisor);
+#endif /* GL_ES_VERSION_3_1 */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/opengles-3.1/include/GLES3/gl3ext.h b/opengles-3.1/include/GLES3/gl3ext.h
new file mode 100644
index 0000000000..4d4ea96c4d
--- /dev/null
+++ b/opengles-3.1/include/GLES3/gl3ext.h
@@ -0,0 +1,24 @@
+#ifndef __gl3ext_h_
+#define __gl3ext_h_
+
+/* $Revision: 17809 $ on $Date:: 2012-05-14 08:03:36 -0700 #$ */
+
+/*
+ * This document is licensed under the SGI Free Software B License Version
+ * 2.0. For details, see http://oss.sgi.com/projects/FreeB/ .
+ */
+
+/* OpenGL ES 3 Extensions
+ *
+ * After an OES extension's interactions with OpenGl ES 3.0 have been documented,
+ * its tokens and function definitions should be added to this file in a manner
+ * that does not conflict with gl2ext.h or gl3.h.
+ *
+ * Tokens and function definitions for extensions that have become standard
+ * features in OpenGL ES 3.0 will not be added to this file.
+ *
+ * Applications using OpenGL-ES-2-only extensions should include gl2ext.h
+ */
+
+#endif /* __gl3ext_h_ */
+
diff --git a/opengles-3.1/include/GLES3/gl3platform.h b/opengles-3.1/include/GLES3/gl3platform.h
new file mode 100644
index 0000000000..1bd1a850fa
--- /dev/null
+++ b/opengles-3.1/include/GLES3/gl3platform.h
@@ -0,0 +1,30 @@
+#ifndef __gl3platform_h_
+#define __gl3platform_h_
+
+/* $Revision: 18437 $ on $Date:: 2012-07-08 23:31:39 -0700 #$ */
+
+/*
+ * This document is licensed under the SGI Free Software B License Version
+ * 2.0. For details, see http://oss.sgi.com/projects/FreeB/ .
+ */
+
+/* Platform-specific types and definitions for OpenGL ES 3.X  gl3.h
+ *
+ * Adopters may modify khrplatform.h and this file to suit their platform.
+ * You are encouraged to submit all modifications to the Khronos group so that
+ * they can be included in future versions of this file.  Please submit changes
+ * by sending them to the public Khronos Bugzilla (http://khronos.org/bugzilla)
+ * by filing a bug against product "OpenGL-ES" component "Registry".
+ */
+
+#include <KHR/khrplatform.h>
+
+#ifndef GL_APICALL
+#define GL_APICALL  KHRONOS_APICALL
+#endif
+
+#ifndef GL_APIENTRY
+#define GL_APIENTRY KHRONOS_APIENTRY
+#endif
+
+#endif /* __gl3platform_h_ */
diff --git a/opengles-3.1/include/KHR/khrplatform.h b/opengles-3.1/include/KHR/khrplatform.h
new file mode 100644
index 0000000000..11e873ea96
--- /dev/null
+++ b/opengles-3.1/include/KHR/khrplatform.h
@@ -0,0 +1,273 @@
+#ifndef __khrplatform_h_
+#define __khrplatform_h_
+
+/*
+** Copyright (c) 2008-2009 The Khronos Group Inc.
+**
+** Permission is hereby granted, free of charge, to any person obtaining a
+** copy of this software and/or associated documentation files (the
+** "Materials"), to deal in the Materials without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Materials, and to
+** permit persons to whom the Materials are furnished to do so, subject to
+** the following conditions:
+**
+** The above copyright notice and this permission notice shall be included
+** in all copies or substantial portions of the Materials.
+**
+** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+*/
+
+/* Khronos platform-specific types and definitions.
+ *
+ * $Revision: 9356 $ on $Date: 2009-10-21 02:52:25 -0700 (Wed, 21 Oct 2009) $
+ *
+ * Adopters may modify this file to suit their platform. Adopters are
+ * encouraged to submit platform specific modifications to the Khronos
+ * group so that they can be included in future versions of this file.
+ * Please submit changes by sending them to the public Khronos Bugzilla
+ * (http://khronos.org/bugzilla) by filing a bug against product
+ * "Khronos (general)" component "Registry".
+ *
+ * A predefined template which fills in some of the bug fields can be
+ * reached using http://tinyurl.com/khrplatform-h-bugreport, but you
+ * must create a Bugzilla login first.
+ *
+ *
+ * See the Implementer's Guidelines for information about where this file
+ * should be located on your system and for more details of its use:
+ *    http://www.khronos.org/registry/implementers_guide.pdf
+ *
+ * This file should be included as
+ *        #include <KHR/khrplatform.h>
+ * by Khronos client API header files that use its types and defines.
+ *
+ * The types in khrplatform.h should only be used to define API-specific types.
+ *
+ * Types defined in khrplatform.h:
+ *    khronos_int8_t              signed   8  bit
+ *    khronos_uint8_t             unsigned 8  bit
+ *    khronos_int16_t             signed   16 bit
+ *    khronos_uint16_t            unsigned 16 bit
+ *    khronos_int32_t             signed   32 bit
+ *    khronos_uint32_t            unsigned 32 bit
+ *    khronos_int64_t             signed   64 bit
+ *    khronos_uint64_t            unsigned 64 bit
+ *    khronos_intptr_t            signed   same number of bits as a pointer
+ *    khronos_uintptr_t           unsigned same number of bits as a pointer
+ *    khronos_ssize_t             signed   size
+ *    khronos_usize_t             unsigned size
+ *    khronos_float_t             signed   32 bit floating point
+ *    khronos_time_ns_t           unsigned 64 bit time in nanoseconds
+ *    khronos_utime_nanoseconds_t unsigned time interval or absolute time in
+ *                                         nanoseconds
+ *    khronos_stime_nanoseconds_t signed time interval in nanoseconds
+ *    khronos_boolean_enum_t      enumerated boolean type. This should
+ *      only be used as a base type when a client API's boolean type is
+ *      an enum. Client APIs which use an integer or other type for
+ *      booleans cannot use this as the base type for their boolean.
+ *
+ * Tokens defined in khrplatform.h:
+ *
+ *    KHRONOS_FALSE, KHRONOS_TRUE Enumerated boolean false/true values.
+ *
+ *    KHRONOS_SUPPORT_INT64 is 1 if 64 bit integers are supported; otherwise 0.
+ *    KHRONOS_SUPPORT_FLOAT is 1 if floats are supported; otherwise 0.
+ *
+ * Calling convention macros defined in this file:
+ *    KHRONOS_APICALL
+ *    KHRONOS_APIENTRY
+ *    KHRONOS_APIATTRIBUTES
+ *
+ * These may be used in function prototypes as:
+ *
+ *      KHRONOS_APICALL void KHRONOS_APIENTRY funcname(
+ *                                  int arg1,
+ *                                  int arg2) KHRONOS_APIATTRIBUTES;
+ */
+
+/*-------------------------------------------------------------------------
+ * Definition of KHRONOS_APICALL
+ *-------------------------------------------------------------------------
+ * This precedes the return type of the function in the function prototype.
+ */
+#if defined(_WIN32) && !defined(__SCITECH_SNAP__)
+#   if defined (_DLL_EXPORTS)
+#       define KHRONOS_APICALL __declspec(dllexport)
+#   else
+#       define KHRONOS_APICALL __declspec(dllimport)
+#   endif
+#elif defined (__SYMBIAN32__)
+#   define KHRONOS_APICALL IMPORT_C
+#else
+#   define KHRONOS_APICALL
+#endif
+
+/*-------------------------------------------------------------------------
+ * Definition of KHRONOS_APIENTRY
+ *-------------------------------------------------------------------------
+ * This follows the return type of the function  and precedes the function
+ * name in the function prototype.
+ */
+#if defined(_WIN32) && !defined(_WIN32_WCE) && !defined(__SCITECH_SNAP__)
+    /* Win32 but not WinCE */
+#   define KHRONOS_APIENTRY __stdcall
+#else
+#   define KHRONOS_APIENTRY
+#endif
+
+/*-------------------------------------------------------------------------
+ * Definition of KHRONOS_APIATTRIBUTES
+ *-------------------------------------------------------------------------
+ * This follows the closing parenthesis of the function prototype arguments.
+ */
+#if defined (__ARMCC_2__)
+#define KHRONOS_APIATTRIBUTES __softfp
+#else
+#define KHRONOS_APIATTRIBUTES
+#endif
+
+/*-------------------------------------------------------------------------
+ * basic type definitions
+ *-----------------------------------------------------------------------*/
+#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || defined(__GNUC__) || defined(__SCO__) || defined(__USLC__)
+
+
+/*
+ * Using <stdint.h>
+ */
+#include <stdint.h>
+typedef int32_t                 khronos_int32_t;
+typedef uint32_t                khronos_uint32_t;
+typedef int64_t                 khronos_int64_t;
+typedef uint64_t                khronos_uint64_t;
+#define KHRONOS_SUPPORT_INT64   1
+#define KHRONOS_SUPPORT_FLOAT   1
+
+#elif defined(__VMS ) || defined(__sgi)
+
+/*
+ * Using <inttypes.h>
+ */
+#include <inttypes.h>
+typedef int32_t                 khronos_int32_t;
+typedef uint32_t                khronos_uint32_t;
+typedef int64_t                 khronos_int64_t;
+typedef uint64_t                khronos_uint64_t;
+#define KHRONOS_SUPPORT_INT64   1
+#define KHRONOS_SUPPORT_FLOAT   1
+
+#elif defined(_WIN32) && !defined(__SCITECH_SNAP__)
+
+/*
+ * Win32
+ */
+typedef __int32                 khronos_int32_t;
+typedef unsigned __int32        khronos_uint32_t;
+typedef __int64                 khronos_int64_t;
+typedef unsigned __int64        khronos_uint64_t;
+#define KHRONOS_SUPPORT_INT64   1
+#define KHRONOS_SUPPORT_FLOAT   1
+
+#elif defined(__sun__) || defined(__digital__)
+
+/*
+ * Sun or Digital
+ */
+typedef int                     khronos_int32_t;
+typedef unsigned int            khronos_uint32_t;
+#if defined(__arch64__) || defined(_LP64)
+typedef long int                khronos_int64_t;
+typedef unsigned long int       khronos_uint64_t;
+#else
+typedef long long int           khronos_int64_t;
+typedef unsigned long long int  khronos_uint64_t;
+#endif /* __arch64__ */
+#define KHRONOS_SUPPORT_INT64   1
+#define KHRONOS_SUPPORT_FLOAT   1
+
+#elif 0
+
+/*
+ * Hypothetical platform with no float or int64 support
+ */
+typedef int                     khronos_int32_t;
+typedef unsigned int            khronos_uint32_t;
+#define KHRONOS_SUPPORT_INT64   0
+#define KHRONOS_SUPPORT_FLOAT   0
+
+#else
+
+/*
+ * Generic fallback
+ */
+#include <stdint.h>
+typedef int32_t                 khronos_int32_t;
+typedef uint32_t                khronos_uint32_t;
+typedef int64_t                 khronos_int64_t;
+typedef uint64_t                khronos_uint64_t;
+#define KHRONOS_SUPPORT_INT64   1
+#define KHRONOS_SUPPORT_FLOAT   1
+
+#endif
+
+
+/*
+ * Types that are (so far) the same on all platforms
+ */
+typedef signed   char          khronos_int8_t;
+typedef unsigned char          khronos_uint8_t;
+typedef signed   short int     khronos_int16_t;
+typedef unsigned short int     khronos_uint16_t;
+typedef signed   long  int     khronos_intptr_t;
+typedef unsigned long  int     khronos_uintptr_t;
+typedef signed   long  int     khronos_ssize_t;
+typedef unsigned long  int     khronos_usize_t;
+
+#if KHRONOS_SUPPORT_FLOAT
+/*
+ * Float type
+ */
+typedef          float         khronos_float_t;
+#endif
+
+#if KHRONOS_SUPPORT_INT64
+/* Time types
+ *
+ * These types can be used to represent a time interval in nanoseconds or
+ * an absolute Unadjusted System Time.  Unadjusted System Time is the number
+ * of nanoseconds since some arbitrary system event (e.g. since the last
+ * time the system booted).  The Unadjusted System Time is an unsigned
+ * 64 bit value that wraps back to 0 every 584 years.  Time intervals
+ * may be either signed or unsigned.
+ */
+typedef khronos_uint64_t       khronos_utime_nanoseconds_t;
+typedef khronos_int64_t        khronos_stime_nanoseconds_t;
+#endif
+
+/*
+ * Dummy value used to pad enum types to 32 bits.
+ */
+#ifndef KHRONOS_MAX_ENUM
+#define KHRONOS_MAX_ENUM 0x7FFFFFFF
+#endif
+
+/*
+ * Enumerated boolean type
+ *
+ * Values other than zero should be considered to be true.  Therefore
+ * comparisons should not be made against KHRONOS_TRUE.
+ */
+typedef enum {
+    KHRONOS_FALSE = 0,
+    KHRONOS_TRUE  = 1,
+    KHRONOS_BOOLEAN_ENUM_FORCE_SIZE = KHRONOS_MAX_ENUM
+} khronos_boolean_enum_t;
+
+#endif /* __khrplatform_h_ */
diff --git a/opengles-3.1/mali_include/EGL/fbdev_window.h b/opengles-3.1/mali_include/EGL/fbdev_window.h
new file mode 100644
index 0000000000..f5f90b5370
--- /dev/null
+++ b/opengles-3.1/mali_include/EGL/fbdev_window.h
@@ -0,0 +1,50 @@
+/* vim:set sts=4 ts=4 noexpandtab: */
+/*
+ * This confidential and proprietary software may be used only as
+ * authorised by a licensing agreement from ARM Limited
+ * (C) COPYRIGHT 2008,2009 ARM Limited
+ * ALL RIGHTS RESERVED
+ * The entire notice above must be reproduced on all authorised
+ * copies and copies may only be made to the extent permitted
+ * by a licensing agreement from ARM Limited.
+ */
+
+#ifndef _FBDEV_WINDOW_H_
+#define _FBDEV_WINDOW_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum
+{
+	FBDEV_PIXMAP_SUPPORTS_UMP = (1<<0)
+} fbdev_pixmap_flags;
+
+typedef struct fbdev_window
+{
+	unsigned short width;
+	unsigned short height;
+} fbdev_window;
+
+typedef struct fbdev_pixmap 
+{
+	unsigned int height;
+	unsigned int width;
+	unsigned int bytes_per_pixel;
+	unsigned char buffer_size;
+	unsigned char red_size;
+	unsigned char green_size;
+	unsigned char blue_size;
+	unsigned char alpha_size;
+	unsigned char luminance_size;
+	fbdev_pixmap_flags flags;
+	unsigned short *data;
+} fbdev_pixmap;
+
+#ifdef __cplusplus
+}
+#endif
+ 
+
+#endif
diff --git a/opengles-3.1/stubs/EGL.c b/opengles-3.1/stubs/EGL.c
new file mode 100644
index 0000000000..dc0d4bd989
--- /dev/null
+++ b/opengles-3.1/stubs/EGL.c
@@ -0,0 +1,40 @@
+#include <stdio.h>
+
+#define PRINT_STUB_ERROR printf("\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\nERROR: %s from stub libEGL.so library called! This library can be used to resolve OpenGL ES symbols at compile time but must *not* be in your runtime path (You need to use a real OpenGL ES implementation, this one is empty)\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n", __func__)
+
+void eglBindAPI(void) { PRINT_STUB_ERROR; return; }
+void eglBindTexImage(void) { PRINT_STUB_ERROR; return; }
+void eglChooseConfig(void) { PRINT_STUB_ERROR; return; }
+void eglCopyBuffers(void) { PRINT_STUB_ERROR; return; }
+void eglCreateContext(void) { PRINT_STUB_ERROR; return; }
+void eglCreateImageKHR (void) { PRINT_STUB_ERROR; return; }
+void eglCreatePbufferFromClientBuffer(void) { PRINT_STUB_ERROR; return; }
+void eglCreatePbufferSurface(void) { PRINT_STUB_ERROR; return; }
+void eglCreatePixmapSurface(void) { PRINT_STUB_ERROR; return; }
+void eglCreateWindowSurface(void) { PRINT_STUB_ERROR; return; }
+void eglDestroyContext(void) { PRINT_STUB_ERROR; return; }
+void eglDestroyImageKHR (void) { PRINT_STUB_ERROR; return; }
+void eglDestroySurface(void) { PRINT_STUB_ERROR; return; }
+void eglGetConfigAttrib(void) { PRINT_STUB_ERROR; return; }
+void eglGetConfigs(void) { PRINT_STUB_ERROR; return; }
+void eglGetCurrentContext(void) { PRINT_STUB_ERROR; return; }
+void eglGetCurrentDisplay(void) { PRINT_STUB_ERROR; return; }
+void eglGetCurrentSurface(void) { PRINT_STUB_ERROR; return; }
+void eglGetDisplay(void) { PRINT_STUB_ERROR; return; }
+void eglGetError(void) { PRINT_STUB_ERROR; return; }
+void eglGetProcAddress(void) { PRINT_STUB_ERROR; return; }
+void eglInitialize(void) { PRINT_STUB_ERROR; return; }
+void eglMakeCurrent(void) { PRINT_STUB_ERROR; return; }
+void eglQueryAPI(void) { PRINT_STUB_ERROR; return; }
+void eglQueryContext(void) { PRINT_STUB_ERROR; return; }
+void eglQueryString(void) { PRINT_STUB_ERROR; return; }
+void eglQuerySurface(void) { PRINT_STUB_ERROR; return; }
+void eglReleaseTexImage(void) { PRINT_STUB_ERROR; return; }
+void eglReleaseThread(void) { PRINT_STUB_ERROR; return; }
+void eglSurfaceAttrib(void) { PRINT_STUB_ERROR; return; }
+void eglSwapBuffers(void) { PRINT_STUB_ERROR; return; }
+void eglSwapInterval(void) { PRINT_STUB_ERROR; return; }
+void eglTerminate(void) { PRINT_STUB_ERROR; return; }
+void eglWaitClient(void) { PRINT_STUB_ERROR; return; }
+void eglWaitGL(void) { PRINT_STUB_ERROR; return; }
+void eglWaitNative(void) { PRINT_STUB_ERROR; return; }
diff --git a/opengles-3.1/stubs/GLESv2.c b/opengles-3.1/stubs/GLESv2.c
new file mode 100644
index 0000000000..c58f0f7772
--- /dev/null
+++ b/opengles-3.1/stubs/GLESv2.c
@@ -0,0 +1,269 @@
+#include <stdio.h>
+
+#define PRINT_STUB_ERROR printf("\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\nERROR: %s from stub libGLESv2.so library called! This library can be used to resolve OpenGL ES symbols at compile time but must *not* be in your runtime path (You need to use a real OpenGL ES implementation, this one is empty)\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n", __func__)
+
+void glActiveTexture(void) { PRINT_STUB_ERROR; return; }
+void glAttachShader(void) { PRINT_STUB_ERROR; return; }
+void glBindAttribLocation(void) { PRINT_STUB_ERROR; return; }
+void glBindBuffer(void) { PRINT_STUB_ERROR; return; }
+void glBindFramebuffer(void) { PRINT_STUB_ERROR; return; }
+void glBindRenderbuffer(void) { PRINT_STUB_ERROR; return; }
+void glBindTexture(void) { PRINT_STUB_ERROR; return; }
+void glBlendColor(void) { PRINT_STUB_ERROR; return; }
+void glBlendEquation(void) { PRINT_STUB_ERROR; return; }
+void glBlendEquationSeparate(void) { PRINT_STUB_ERROR; return; }
+void glBlendFunc(void) { PRINT_STUB_ERROR; return; }
+void glBlendFuncSeparate(void) { PRINT_STUB_ERROR; return; }
+void glBufferData(void) { PRINT_STUB_ERROR; return; }
+void glBufferSubData(void) { PRINT_STUB_ERROR; return; }
+void glCheckFramebufferStatus(void) { PRINT_STUB_ERROR; return; }
+void glClear(void) { PRINT_STUB_ERROR; return; }
+void glClearColor(void) { PRINT_STUB_ERROR; return; }
+void glClearDepthf(void) { PRINT_STUB_ERROR; return; }
+void glClearStencil(void) { PRINT_STUB_ERROR; return; }
+void glColorMask(void) { PRINT_STUB_ERROR; return; }
+void glCompileShader(void) { PRINT_STUB_ERROR; return; }
+void glCompressedTexImage2D(void) { PRINT_STUB_ERROR; return; }
+void glCompressedTexSubImage2D(void) { PRINT_STUB_ERROR; return; }
+void glCopyTexImage2D(void) { PRINT_STUB_ERROR; return; }
+void glCopyTexSubImage2D(void) { PRINT_STUB_ERROR; return; }
+void glCreateProgram(void) { PRINT_STUB_ERROR; return; }
+void glCreateShader(void) { PRINT_STUB_ERROR; return; }
+void glCullFace(void) { PRINT_STUB_ERROR; return; }
+void glDeleteBuffers(void) { PRINT_STUB_ERROR; return; }
+void glDeleteFramebuffers(void) { PRINT_STUB_ERROR; return; }
+void glDeleteProgram(void) { PRINT_STUB_ERROR; return; }
+void glDeleteRenderbuffers(void) { PRINT_STUB_ERROR; return; }
+void glDeleteShader(void) { PRINT_STUB_ERROR; return; }
+void glDeleteTextures(void) { PRINT_STUB_ERROR; return; }
+void glDepthFunc(void) { PRINT_STUB_ERROR; return; }
+void glDepthMask(void) { PRINT_STUB_ERROR; return; }
+void glDepthRangef(void) { PRINT_STUB_ERROR; return; }
+void glDetachShader(void) { PRINT_STUB_ERROR; return; }
+void glDisable(void) { PRINT_STUB_ERROR; return; }
+void glDisableVertexAttribArray(void) { PRINT_STUB_ERROR; return; }
+void glDrawArrays(void) { PRINT_STUB_ERROR; return; }
+void glDrawElements(void) { PRINT_STUB_ERROR; return; }
+void glEnable(void) { PRINT_STUB_ERROR; return; }
+void glEnableVertexAttribArray(void) { PRINT_STUB_ERROR; return; }
+void glFinish(void) { PRINT_STUB_ERROR; return; }
+void glFlush(void) { PRINT_STUB_ERROR; return; }
+void glFramebufferRenderbuffer(void) { PRINT_STUB_ERROR; return; }
+void glFramebufferTexture2D(void) { PRINT_STUB_ERROR; return; }
+void glFrontFace(void) { PRINT_STUB_ERROR; return; }
+void glGenBuffers(void) { PRINT_STUB_ERROR; return; }
+void glGenerateMipmap(void) { PRINT_STUB_ERROR; return; }
+void glGenFramebuffers(void) { PRINT_STUB_ERROR; return; }
+void glGenRenderbuffers(void) { PRINT_STUB_ERROR; return; }
+void glGenTextures(void) { PRINT_STUB_ERROR; return; }
+void glGetActiveAttrib(void) { PRINT_STUB_ERROR; return; }
+void glGetActiveUniform(void) { PRINT_STUB_ERROR; return; }
+void glGetAttachedShaders(void) { PRINT_STUB_ERROR; return; }
+void glGetAttribLocation(void) { PRINT_STUB_ERROR; return; }
+void glGetBooleanv(void) { PRINT_STUB_ERROR; return; }
+void glGetBufferParameteriv(void) { PRINT_STUB_ERROR; return; }
+void glGetError(void) { PRINT_STUB_ERROR; return; }
+void glGetFloatv(void) { PRINT_STUB_ERROR; return; }
+void glGetFramebufferAttachmentParameteriv(void) { PRINT_STUB_ERROR; return; }
+void glGetIntegerv(void) { PRINT_STUB_ERROR; return; }
+void glGetProgramiv(void) { PRINT_STUB_ERROR; return; }
+void glGetProgramInfoLog(void) { PRINT_STUB_ERROR; return; }
+void glGetRenderbufferParameteriv(void) { PRINT_STUB_ERROR; return; }
+void glGetShaderiv(void) { PRINT_STUB_ERROR; return; }
+void glGetShaderInfoLog(void) { PRINT_STUB_ERROR; return; }
+void glGetShaderPrecisionFormat(void) { PRINT_STUB_ERROR; return; }
+void glGetShaderSource(void) { PRINT_STUB_ERROR; return; }
+void glGetString(void) { PRINT_STUB_ERROR; return; }
+void glGetTexParameterfv(void) { PRINT_STUB_ERROR; return; }
+void glGetTexParameteriv(void) { PRINT_STUB_ERROR; return; }
+void glGetUniformfv(void) { PRINT_STUB_ERROR; return; }
+void glGetUniformiv(void) { PRINT_STUB_ERROR; return; }
+void glGetUniformLocation(void) { PRINT_STUB_ERROR; return; }
+void glGetVertexAttribfv(void) { PRINT_STUB_ERROR; return; }
+void glGetVertexAttribiv(void) { PRINT_STUB_ERROR; return; }
+void glGetVertexAttribPointerv(void) { PRINT_STUB_ERROR; return; }
+void glHint(void) { PRINT_STUB_ERROR; return; }
+void glIsBuffer(void) { PRINT_STUB_ERROR; return; }
+void glIsEnabled(void) { PRINT_STUB_ERROR; return; }
+void glIsFramebuffer(void) { PRINT_STUB_ERROR; return; }
+void glIsProgram(void) { PRINT_STUB_ERROR; return; }
+void glIsRenderbuffer(void) { PRINT_STUB_ERROR; return; }
+void glIsShader(void) { PRINT_STUB_ERROR; return; }
+void glIsTexture(void) { PRINT_STUB_ERROR; return; }
+void glLineWidth(void) { PRINT_STUB_ERROR; return; }
+void glLinkProgram(void) { PRINT_STUB_ERROR; return; }
+void glPixelStorei(void) { PRINT_STUB_ERROR; return; }
+void glPolygonOffset(void) { PRINT_STUB_ERROR; return; }
+void glReadPixels(void) { PRINT_STUB_ERROR; return; }
+void glReleaseShaderCompiler(void) { PRINT_STUB_ERROR; return; }
+void glRenderbufferStorage(void) { PRINT_STUB_ERROR; return; }
+void glSampleCoverage(void) { PRINT_STUB_ERROR; return; }
+void glScissor(void) { PRINT_STUB_ERROR; return; }
+void glShaderBinary(void) { PRINT_STUB_ERROR; return; }
+void glShaderSource(void) { PRINT_STUB_ERROR; return; }
+void glStencilFunc(void) { PRINT_STUB_ERROR; return; }
+void glStencilFuncSeparate(void) { PRINT_STUB_ERROR; return; }
+void glStencilMask(void) { PRINT_STUB_ERROR; return; }
+void glStencilMaskSeparate(void) { PRINT_STUB_ERROR; return; }
+void glStencilOp(void) { PRINT_STUB_ERROR; return; }
+void glStencilOpSeparate(void) { PRINT_STUB_ERROR; return; }
+void glTexImage2D(void) { PRINT_STUB_ERROR; return; }
+void glTexParameterf(void) { PRINT_STUB_ERROR; return; }
+void glTexParameterfv(void) { PRINT_STUB_ERROR; return; }
+void glTexParameteri(void) { PRINT_STUB_ERROR; return; }
+void glTexParameteriv(void) { PRINT_STUB_ERROR; return; }
+void glTexSubImage2D(void) { PRINT_STUB_ERROR; return; }
+void glUniform1f(void) { PRINT_STUB_ERROR; return; }
+void glUniform1fv(void) { PRINT_STUB_ERROR; return; }
+void glUniform1i(void) { PRINT_STUB_ERROR; return; }
+void glUniform1iv(void) { PRINT_STUB_ERROR; return; }
+void glUniform2f(void) { PRINT_STUB_ERROR; return; }
+void glUniform2fv(void) { PRINT_STUB_ERROR; return; }
+void glUniform2i(void) { PRINT_STUB_ERROR; return; }
+void glUniform2iv(void) { PRINT_STUB_ERROR; return; }
+void glUniform3f(void) { PRINT_STUB_ERROR; return; }
+void glUniform3fv(void) { PRINT_STUB_ERROR; return; }
+void glUniform3i(void) { PRINT_STUB_ERROR; return; }
+void glUniform3iv(void) { PRINT_STUB_ERROR; return; }
+void glUniform4f(void) { PRINT_STUB_ERROR; return; }
+void glUniform4fv(void) { PRINT_STUB_ERROR; return; }
+void glUniform4i(void) { PRINT_STUB_ERROR; return; }
+void glUniform4iv(void) { PRINT_STUB_ERROR; return; }
+void glUniformMatrix2fv(void) { PRINT_STUB_ERROR; return; }
+void glUniformMatrix3fv(void) { PRINT_STUB_ERROR; return; }
+void glUniformMatrix4fv(void) { PRINT_STUB_ERROR; return; }
+void glUseProgram(void) { PRINT_STUB_ERROR; return; }
+void glValidateProgram(void) { PRINT_STUB_ERROR; return; }
+void glVertexAttrib1f(void) { PRINT_STUB_ERROR; return; }
+void glVertexAttrib1fv(void) { PRINT_STUB_ERROR; return; }
+void glVertexAttrib2f(void) { PRINT_STUB_ERROR; return; }
+void glVertexAttrib2fv(void) { PRINT_STUB_ERROR; return; }
+void glVertexAttrib3f(void) { PRINT_STUB_ERROR; return; }
+void glVertexAttrib3fv(void) { PRINT_STUB_ERROR; return; }
+void glVertexAttrib4f(void) { PRINT_STUB_ERROR; return; }
+void glVertexAttrib4fv(void) { PRINT_STUB_ERROR; return; }
+void glVertexAttribPointer(void) { PRINT_STUB_ERROR; return; }
+void glViewport(void) { PRINT_STUB_ERROR; return; }
+
+/* OpenGL ES 3.0 */
+
+void glReadBuffer(void) { PRINT_STUB_ERROR; return; }
+void glDrawRangeElements(void) { PRINT_STUB_ERROR; return; }
+void glTexImage3D(void) { PRINT_STUB_ERROR; return; }
+void glTexSubImage3D(void) { PRINT_STUB_ERROR; return; }
+void glCopyTexSubImage3D(void) { PRINT_STUB_ERROR; return; }
+void glCompressedTexImage3D(void) { PRINT_STUB_ERROR; return; }
+void glCompressedTexSubImage3D(void) { PRINT_STUB_ERROR; return; }
+void glGenQueries(void) { PRINT_STUB_ERROR; return; }
+void glDeleteQueries(void) { PRINT_STUB_ERROR; return; }
+void glIsQuery(void) { PRINT_STUB_ERROR; return; }
+void glBeginQuery(void) { PRINT_STUB_ERROR; return; }
+void glEndQuery(void) { PRINT_STUB_ERROR; return; }
+void glGetQueryiv(void) { PRINT_STUB_ERROR; return; }
+void glGetQueryObjectuiv(void) { PRINT_STUB_ERROR; return; }
+void glUnmapBuffer(void) { PRINT_STUB_ERROR; return; }
+void glGetBufferPointerv(void) { PRINT_STUB_ERROR; return; }
+void glDrawBuffers(void) { PRINT_STUB_ERROR; return; }
+void glUniformMatrix2x3fv(void) { PRINT_STUB_ERROR; return; }
+void glUniformMatrix3x2fv(void) { PRINT_STUB_ERROR; return; }
+void glUniformMatrix2x4fv(void) { PRINT_STUB_ERROR; return; }
+void glUniformMatrix4x2fv(void) { PRINT_STUB_ERROR; return; }
+void glUniformMatrix3x4fv(void) { PRINT_STUB_ERROR; return; }
+void glUniformMatrix4x3fv(void) { PRINT_STUB_ERROR; return; }
+void glBlitFramebuffer(void) { PRINT_STUB_ERROR; return; }
+void glRenderbufferStorageMultisample(void) { PRINT_STUB_ERROR; return; }
+void glFramebufferTextureLayer(void) { PRINT_STUB_ERROR; return; }
+void glMapBufferRange(void) { PRINT_STUB_ERROR; return; }
+void glFlushMappedBufferRange(void) { PRINT_STUB_ERROR; return; }
+void glBindVertexArray(void) { PRINT_STUB_ERROR; return; }
+void glDeleteVertexArrays(void) { PRINT_STUB_ERROR; return; }
+void glGenVertexArrays(void) { PRINT_STUB_ERROR; return; }
+void glIsVertexArray(void) { PRINT_STUB_ERROR; return; }
+void glGetIntegeri_v(void) { PRINT_STUB_ERROR; return; }
+void glBeginTransformFeedback(void) { PRINT_STUB_ERROR; return; }
+void glEndTransformFeedback(void) { PRINT_STUB_ERROR; return; }
+void glBindBufferRange(void) { PRINT_STUB_ERROR; return; }
+void glBindBufferBase(void) { PRINT_STUB_ERROR; return; }
+void glTransformFeedbackVaryings(void) { PRINT_STUB_ERROR; return; }
+void glGetTransformFeedbackVarying(void) { PRINT_STUB_ERROR; return; }
+void glVertexAttribIPointer(void) { PRINT_STUB_ERROR; return; }
+void glGetVertexAttribIiv(void) { PRINT_STUB_ERROR; return; }
+void glGetVertexAttribIuiv(void) { PRINT_STUB_ERROR; return; }
+void glVertexAttribI4i(void) { PRINT_STUB_ERROR; return; }
+void glVertexAttribI4ui(void) { PRINT_STUB_ERROR; return; }
+void glVertexAttribI4iv(void) { PRINT_STUB_ERROR; return; }
+void glVertexAttribI4uiv(void) { PRINT_STUB_ERROR; return; }
+void glGetUniformuiv(void) { PRINT_STUB_ERROR; return; }
+void glGetFragDataLocation(void) { PRINT_STUB_ERROR; return; }
+void glUniform1ui(void) { PRINT_STUB_ERROR; return; }
+void glUniform2ui(void) { PRINT_STUB_ERROR; return; }
+void glUniform3ui(void) { PRINT_STUB_ERROR; return; }
+void glUniform4ui(void) { PRINT_STUB_ERROR; return; }
+void glUniform1uiv(void) { PRINT_STUB_ERROR; return; }
+void glUniform2uiv(void) { PRINT_STUB_ERROR; return; }
+void glUniform3uiv(void) { PRINT_STUB_ERROR; return; }
+void glUniform4uiv(void) { PRINT_STUB_ERROR; return; }
+void glClearBufferiv(void) { PRINT_STUB_ERROR; return; }
+void glClearBufferuiv(void) { PRINT_STUB_ERROR; return; }
+void glClearBufferfv(void) { PRINT_STUB_ERROR; return; }
+void glClearBufferfi(void) { PRINT_STUB_ERROR; return; }
+void glGetStringi(void) { PRINT_STUB_ERROR; return; }
+void glCopyBufferSubData(void) { PRINT_STUB_ERROR; return; }
+void glGetUniformIndices(void) { PRINT_STUB_ERROR; return; }
+void glGetActiveUniformsiv(void) { PRINT_STUB_ERROR; return; }
+void glGetUniformBlockIndex(void) { PRINT_STUB_ERROR; return; }
+void glGetActiveUniformBlockiv(void) { PRINT_STUB_ERROR; return; }
+void glGetActiveUniformBlockName(void) { PRINT_STUB_ERROR; return; }
+void glUniformBlockBinding(void) { PRINT_STUB_ERROR; return; }
+void glDrawArraysInstanced(void) { PRINT_STUB_ERROR; return; }
+void glDrawElementsInstanced(void) { PRINT_STUB_ERROR; return; }
+void glFenceSync(void) { PRINT_STUB_ERROR; return; }
+void glIsSync(void) { PRINT_STUB_ERROR; return; }
+void glDeleteSync(void) { PRINT_STUB_ERROR; return; }
+void glClientWaitSync(void) { PRINT_STUB_ERROR; return; }
+void glWaitSync(void) { PRINT_STUB_ERROR; return; }
+void glGetInteger64v(void) { PRINT_STUB_ERROR; return; }
+void glGetSynciv(void) { PRINT_STUB_ERROR; return; }
+void glGetInteger64i_v(void) { PRINT_STUB_ERROR; return; }
+void glGetBufferParameteri64v(void) { PRINT_STUB_ERROR; return; }
+void glGenSamplers(void) { PRINT_STUB_ERROR; return; }
+void glDeleteSamplers(void) { PRINT_STUB_ERROR; return; }
+void glIsSampler(void) { PRINT_STUB_ERROR; return; }
+void glBindSampler(void) { PRINT_STUB_ERROR; return; }
+void glSamplerParameteri(void) { PRINT_STUB_ERROR; return; }
+void glSamplerParameteriv(void) { PRINT_STUB_ERROR; return; }
+void glSamplerParameterf(void) { PRINT_STUB_ERROR; return; }
+void glSamplerParameterfv(void) { PRINT_STUB_ERROR; return; }
+void glGetSamplerParameteriv(void) { PRINT_STUB_ERROR; return; }
+void glGetSamplerParameterfv(void) { PRINT_STUB_ERROR; return; }
+void glVertexAttribDivisor(void) { PRINT_STUB_ERROR; return; }
+void glBindTransformFeedback(void) { PRINT_STUB_ERROR; return; }
+void glDeleteTransformFeedbacks(void) { PRINT_STUB_ERROR; return; }
+void glGenTransformFeedbacks(void) { PRINT_STUB_ERROR; return; }
+void glIsTransformFeedback(void) { PRINT_STUB_ERROR; return; }
+void glPauseTransformFeedback(void) { PRINT_STUB_ERROR; return; }
+void glResumeTransformFeedback(void) { PRINT_STUB_ERROR; return; }
+void glGetProgramBinary(void) { PRINT_STUB_ERROR; return; }
+void glProgramBinary(void) { PRINT_STUB_ERROR; return; }
+void glProgramParameteri(void) { PRINT_STUB_ERROR; return; }
+void glInvalidateFramebuffer(void) { PRINT_STUB_ERROR; return; }
+void glInvalidateSubFramebuffer(void) { PRINT_STUB_ERROR; return; }
+void glTexStorage2D(void) { PRINT_STUB_ERROR; return; }
+void glTexStorage3D(void) { PRINT_STUB_ERROR; return; }
+void glGetInternalformativ(void) { PRINT_STUB_ERROR; return; }
+
+/* OpenGL ES 3.1, incomplete */
+
+void glProgramUniform1ui(void) { PRINT_STUB_ERROR; return; }
+void glDispatchCompute(void) { PRINT_STUB_ERROR; return; }
+void glMemoryBarrier(void) { PRINT_STUB_ERROR; return; }
+void glBindImageTexture(void) { PRINT_STUB_ERROR; return; }
+void glProgramUniformMatrix4fv(void) { PRINT_STUB_ERROR; return; }
+void glProgramUniform1f(void) { PRINT_STUB_ERROR; return; }
+void glProgramUniform2f(void) { PRINT_STUB_ERROR; return; }
+void glProgramUniform3f(void) { PRINT_STUB_ERROR; return; }
+void glProgramUniform3fv(void) { PRINT_STUB_ERROR; return; }
+void glProgramUniform4f(void) { PRINT_STUB_ERROR; return; }
+void glDrawElementsIndirect(void) { PRINT_STUB_ERROR; return; }
+
+
diff --git a/opengles-3.1/stubs/Readme.txt b/opengles-3.1/stubs/Readme.txt
new file mode 100644
index 0000000000..4c079b1b38
--- /dev/null
+++ b/opengles-3.1/stubs/Readme.txt
@@ -0,0 +1,2 @@
+These are dummy libraries for linking against at build time.
+The application will require the real device libraries to run.
diff --git a/opengles-3.1/stubs/SConscript b/opengles-3.1/stubs/SConscript
new file mode 100644
index 0000000000..5d4cb87811
--- /dev/null
+++ b/opengles-3.1/stubs/SConscript
@@ -0,0 +1,11 @@
+Import("env")
+
+egl = env.SharedLibrary("EGL", "EGL.c")
+alias = Alias("egl", egl)
+Default(alias)
+Export("egl")
+
+glesv2 = env.SharedLibrary("GLESv2", "GLESv2.c")
+alias = Alias("glesv2", glesv2)
+Default(alias)
+Export("glesv2")
diff --git a/scripts/check_bad_style.sh b/scripts/check_bad_style.sh
index 827163f02c..cd6e9042c0 100755
--- a/scripts/check_bad_style.sh
+++ b/scripts/check_bad_style.sh
@@ -21,7 +21,7 @@ then
     exit -1
 fi
 
-grep -HnRE --exclude-dir=assembly "\buint " --exclude-dir=cl_kernels $DIRECTORIES | tee bad_style.log
+grep -HnRE --exclude-dir=assembly "\buint " --exclude-dir=cl_kernels --exclude-dir=cs_shaders $DIRECTORIES | tee bad_style.log
 if [[ $(cat bad_style.log | wc -l) > 0 ]]
 then
     echo ""
diff --git a/scripts/clang_tidy_rules.py b/scripts/clang_tidy_rules.py
index 09dc2e1c70..c70395fae8 100755
--- a/scripts/clang_tidy_rules.py
+++ b/scripts/clang_tidy_rules.py
@@ -6,13 +6,14 @@ import re
 import sys
 
 def get_list_includes():
-    return "include . 3rdparty/include kernels computer_vision".split()
+    return "opengles-3.1/include opengles-3.1/mali_include include . 3rdparty/include kernels computer_vision".split()
 
 def get_list_flags( filename, arch):
     assert arch in ["armv7", "aarch64"]
     flags = ["-std=c++11"]
     flags.append("-DARM_COMPUTE_CPP_SCHEDULER=1")
     flags.append("-DARM_COMPUTE_CL")
+    flags.append("-DARM_COMPUTE_GC")
     if arch == "aarch64":
         flags.append("-DARM_COMPUTE_AARCH64_V8_2")
     return flags
diff --git a/scripts/fix_code_formatting.sh b/scripts/fix_code_formatting.sh
index a07d2615af..721ade853f 100755
--- a/scripts/fix_code_formatting.sh
+++ b/scripts/fix_code_formatting.sh
@@ -22,7 +22,7 @@ DIRECTORIES="./arm_compute ./src ./examples ./tests ./utils ./support"
 
 if [ $# -eq 0 ]
 then
-    files=$(find $DIRECTORIES -type f \( -name \*.cpp -o -iname \*.h -o -name \*.inl -o -name \*.cl \))
+    files=$(find $DIRECTORIES -type f \( -name \*.cpp -o -iname \*.h -o -name \*.inl -o -name \*.cl -o -name \*.cs \))
 else
 	files=$@
 fi
diff --git a/src/core/CL/cl_kernels/direct_convolution1x1.cl b/src/core/CL/cl_kernels/direct_convolution1x1.cl
index 7b73b85eac..484bc35ef1 100644
--- a/src/core/CL/cl_kernels/direct_convolution1x1.cl
+++ b/src/core/CL/cl_kernels/direct_convolution1x1.cl
@@ -153,7 +153,7 @@ inline VEC_DATA_TYPE(DATA_TYPE, 8) extract_input_stride3_8(__global const DATA_T
  * @param[in]  dst_stride_z                          Stride of the destination tensor in Z dimension (in bytes)
  * @param[in]  dst_step_z                            dst_stride_z * number of elements along Z processed per workitem(in bytes)
  * @param[in]  dst_offset_first_element_in_bytes     The offset of the first element in the destination tensor
- * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p weights_ptr
+ * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p src_ptr
  * @param[in]  weights_stride_x                      Stride of the weights tensor in X dimension (in bytes)
  * @param[in]  weights_step_x                        weights_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  weights_stride_y                      Stride of the weights tensor in Y dimension (in bytes)
@@ -241,7 +241,7 @@ __kernel void direct_convolution1x1(
  * @param[in]  dst_stride_z                          Stride of the destination tensor in Z dimension (in bytes)
  * @param[in]  dst_step_z                            dst_stride_z * number of elements along Z processed per workitem(in bytes)
  * @param[in]  dst_offset_first_element_in_bytes     The offset of the first element in the destination tensor
- * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p weights_ptr
+ * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p src_ptr
  * @param[in]  weights_stride_x                      Stride of the weights tensor in X dimension (in bytes)
  * @param[in]  weights_step_x                        weights_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  weights_stride_y                      Stride of the weights tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/direct_convolution3x3.cl b/src/core/CL/cl_kernels/direct_convolution3x3.cl
index 1420d7c873..e6e3007c95 100644
--- a/src/core/CL/cl_kernels/direct_convolution3x3.cl
+++ b/src/core/CL/cl_kernels/direct_convolution3x3.cl
@@ -102,7 +102,7 @@ MULQ_SAT_IMPL(qs32x8, qs32x8)
  * @param[in]  dst_stride_z                          Stride of the destination tensor in Z dimension (in bytes)
  * @param[in]  dst_step_z                            dst_stride_z * number of elements along Z processed per workitem(in bytes)
  * @param[in]  dst_offset_first_element_in_bytes     The offset of the first element in the destination tensor
- * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p weights_ptr
+ * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p src_ptr
  * @param[in]  weights_stride_x                      Stride of the weights tensor in X dimension (in bytes)
  * @param[in]  weights_step_x                        weights_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  weights_stride_y                      Stride of the weights tensor in Y dimension (in bytes)
@@ -198,7 +198,7 @@ __kernel void direct_convolution3x3(
  * @param[in]  dst_stride_z                          Stride of the destination tensor in Z dimension (in bytes)
  * @param[in]  dst_step_z                            dst_stride_z * number of elements along Z processed per workitem(in bytes)
  * @param[in]  dst_offset_first_element_in_bytes     The offset of the first element in the destination tensor
- * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p weights_ptr
+ * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p src_ptr
  * @param[in]  weights_stride_x                      Stride of the weights tensor in X dimension (in bytes)
  * @param[in]  weights_step_x                        weights_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  weights_stride_y                      Stride of the weights tensor in Y dimension (in bytes)
diff --git a/src/core/CL/cl_kernels/direct_convolution5x5.cl b/src/core/CL/cl_kernels/direct_convolution5x5.cl
index 6fdd019a14..12cf0fb68e 100644
--- a/src/core/CL/cl_kernels/direct_convolution5x5.cl
+++ b/src/core/CL/cl_kernels/direct_convolution5x5.cl
@@ -91,7 +91,7 @@
  * @param[in]  dst_stride_z                          Stride of the destination tensor in Z dimension (in bytes)
  * @param[in]  dst_step_z                            dst_stride_z * number of elements along Z processed per workitem(in bytes)
  * @param[in]  dst_offset_first_element_in_bytes     The offset of the first element in the destination tensor
- * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p weights_ptr
+ * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p src_ptr
  * @param[in]  weights_stride_x                      Stride of the weights tensor in X dimension (in bytes)
  * @param[in]  weights_step_x                        weights_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  weights_stride_y                      Stride of the weights tensor in Y dimension (in bytes)
@@ -197,7 +197,7 @@ __kernel void direct_convolution5x5(
  * @param[in]  dst_stride_z                          Stride of the destination tensor in Z dimension (in bytes)
  * @param[in]  dst_step_z                            dst_stride_z * number of elements along Z processed per workitem(in bytes)
  * @param[in]  dst_offset_first_element_in_bytes     The offset of the first element in the destination tensor
- * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p weights_ptr
+ * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p src_ptr
  * @param[in]  weights_stride_x                      Stride of the weights tensor in X dimension (in bytes)
  * @param[in]  weights_step_x                        weights_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  weights_stride_y                      Stride of the weights tensor in Y dimension (in bytes)
diff --git a/src/core/Error.cpp b/src/core/Error.cpp
index 2e699feeb9..3b0a012f5f 100644
--- a/src/core/Error.cpp
+++ b/src/core/Error.cpp
@@ -30,23 +30,29 @@
 
 using namespace arm_compute;
 
+Error arm_compute::create_error_va_list(ErrorCode error_code, const char *function, const char *file, const int line, const char *msg, va_list args)
+{
+    char out[512];
+    int  offset = snprintf(out, sizeof(out), "in %s %s:%d: ", function, file, line);
+    vsnprintf(out + offset, sizeof(out) - offset, msg, args);
+
+    return Error(error_code, std::string(out));
+}
+
 Error arm_compute::create_error(ErrorCode error_code, const char *function, const char *file, const int line, const char *msg, ...)
 {
-    char    out[512];
     va_list args;
     va_start(args, msg);
-    int offset = snprintf(out, sizeof(out), "in %s %s:%d: ", function, file, line);
-    vsnprintf(out + offset, sizeof(out) - offset, msg, args);
+    auto err = create_error_va_list(error_code, function, file, line, msg, args);
     va_end(args);
-
-    return Error(error_code, std::string(out));
+    return err;
 }
 
 void arm_compute::error(const char *function, const char *file, const int line, const char *msg, ...)
 {
     va_list args;
     va_start(args, msg);
-    auto err = create_error(ErrorCode::RUNTIME_ERROR, function, file, line, msg, args);
+    auto err = create_error_va_list(ErrorCode::RUNTIME_ERROR, function, file, line, msg, args);
     va_end(args);
     throw std::runtime_error(err.description());
 }
diff --git a/src/core/GLES_COMPUTE/GCKernelLibrary.cpp b/src/core/GLES_COMPUTE/GCKernelLibrary.cpp
new file mode 100644
index 0000000000..fd362f1665
--- /dev/null
+++ b/src/core/GLES_COMPUTE/GCKernelLibrary.cpp
@@ -0,0 +1,716 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Utils.h"
+
+#include <fstream>
+#include <iomanip>
+#include <iostream>
+#include <regex>
+#include <utility>
+#include <vector>
+
+using namespace arm_compute;
+
+GCProgram::GCProgram()
+    : _name(), _source()
+{
+}
+
+GCProgram::GCProgram(std::string name, std::string source)
+    : _name(std::move(name)), _source(std::move(source))
+{
+}
+
+GLuint GCProgram::link_program(GLuint shader)
+{
+    GLuint program = ARM_COMPUTE_GL_CHECK(glCreateProgram());
+
+    GLint   rvalue;
+    GLsizei length;
+
+    ARM_COMPUTE_GL_CHECK(glAttachShader(program, shader));
+    ARM_COMPUTE_GL_CHECK(glLinkProgram(program));
+    ARM_COMPUTE_GL_CHECK(glDetachShader(program, shader));
+    ARM_COMPUTE_GL_CHECK(glDeleteShader(shader));
+
+    // Check if there were some issues when linking the shader.
+    ARM_COMPUTE_GL_CHECK(glGetProgramiv(program, GL_LINK_STATUS, &rvalue));
+
+    if(rvalue == 0)
+    {
+        ARM_COMPUTE_GL_CHECK(glGetProgramiv(program, GL_INFO_LOG_LENGTH, &length));
+
+        std::vector<GLchar> log(length);
+        ARM_COMPUTE_GL_CHECK(glGetProgramInfoLog(program, length, nullptr, log.data()));
+        ARM_COMPUTE_ERROR("Error: Linker log:\n%s\n", log.data());
+
+        return 0;
+    }
+
+    ARM_COMPUTE_GL_CHECK(glUseProgram(program));
+
+    return program;
+}
+
+GLuint GCProgram::compile_shader(const std::string &build_options)
+{
+    GLuint shader = ARM_COMPUTE_GL_CHECK(glCreateShader(GL_COMPUTE_SHADER));
+
+    const char *src[]
+    {
+        "#version 310 es\n",
+        build_options.c_str(),
+        _source.c_str()
+    };
+
+    ARM_COMPUTE_GL_CHECK(glShaderSource(shader, sizeof(src) / sizeof(src[0]), src, nullptr));
+
+    ARM_COMPUTE_GL_CHECK(glCompileShader(shader));
+
+    // Check if there were any issues when compiling the shader
+    GLint   rvalue;
+    GLsizei length;
+
+    ARM_COMPUTE_GL_CHECK(glGetShaderiv(shader, GL_COMPILE_STATUS, &rvalue));
+
+    if(rvalue == 0)
+    {
+        ARM_COMPUTE_GL_CHECK(glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &length));
+
+        std::vector<GLchar> log(length);
+        ARM_COMPUTE_GL_CHECK(glGetShaderInfoLog(shader, length, nullptr, log.data()));
+
+#ifdef ARM_COMPUTE_DEBUG_ENABLED
+        std::istringstream ss(_source);
+        std::stringstream  output_stream;
+        std::string        line;
+        size_t             line_num = 1;
+
+        ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("GLES Shader build options:\n%s\n", build_options.c_str());
+        while(std::getline(ss, line, '\n'))
+        {
+            output_stream << std::setw(6) << line_num << ": " << line << std::endl;
+            line_num++;
+        }
+        ARM_COMPUTE_LOG_INFO_STREAM_CORE("GLES Shader source code:" << output_stream.rdbuf());
+#endif /* ARM_COMPUTE_DEBUG_ENABLED */
+
+        ARM_COMPUTE_ERROR("Error: Compiler log:\n%s\n", log.data());
+
+        return 0;
+    }
+
+    return shader;
+}
+
+GCKernel::GCKernel()
+    : _name(), _program(), _params(), _shader_params(), _shader_params_binding_point(), _shader_params_index(), _shader_params_size()
+{
+}
+
+GCKernel::GCKernel(std::string name, GLuint program)
+    : _name(std::move(name)),
+      _program(program),
+      _params(),
+      _shader_params(0),
+      _shader_params_binding_point(0),
+      _shader_params_index(0),
+      _shader_params_size(0)
+{
+    _params.clear();
+
+    ARM_COMPUTE_GL_CHECK(glGenBuffers(1, &_shader_params));
+
+    _shader_params_index = ARM_COMPUTE_GL_CHECK(glGetUniformBlockIndex(_program, _shader_params_name));
+    ARM_COMPUTE_ERROR_ON_MSG((_shader_params_index == GL_INVALID_INDEX), "Failed to get index of %s", _shader_params_name);
+    ARM_COMPUTE_GL_CHECK(glGetActiveUniformBlockiv(_program, _shader_params_index, GL_UNIFORM_BLOCK_DATA_SIZE, &_shader_params_size));
+    ARM_COMPUTE_ERROR_ON_MSG((_shader_params_size == 0), "Failed to get size of %s", _shader_params_name);
+}
+
+void GCKernel::cleanup()
+{
+    ARM_COMPUTE_GL_CHECK(glDeleteBuffers(1, &_shader_params));
+    ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_UNIFORM_BUFFER, 0));
+    ARM_COMPUTE_GL_CHECK(glDeleteProgram(_program));
+    ARM_COMPUTE_GL_CHECK(glUseProgram(0));
+}
+
+void GCKernel::use()
+{
+    ARM_COMPUTE_GL_CHECK(glUseProgram(_program));
+}
+
+void GCKernel::unuse()
+{
+    ARM_COMPUTE_GL_CHECK(glUseProgram(0));
+}
+
+void GCKernel::update_shader_params()
+{
+    ARM_COMPUTE_ERROR_ON_MSG((_shader_params_size != (int)(_params.size() * sizeof(_params[0]))), "Params size (%d) is not equal to shader params block size (%d)", _params.size() * sizeof(_params[0]),
+                             _shader_params_size);
+
+    ARM_COMPUTE_GL_CHECK(glUniformBlockBinding(_program, _shader_params_index, _shader_params_binding_point));
+    ARM_COMPUTE_GL_CHECK(glBindBufferBase(GL_UNIFORM_BUFFER, _shader_params_binding_point, _shader_params));
+    ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_UNIFORM_BUFFER, _shader_params));
+    ARM_COMPUTE_GL_CHECK(glBufferData(GL_UNIFORM_BUFFER, _shader_params_size, _params.data(), GL_DYNAMIC_DRAW));
+    ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_UNIFORM_BUFFER, 0));
+}
+
+const std::map<std::string, std::string> GCKernelLibrary::_shader_program_map =
+{
+    { "absdiff", "absdiff.cs" },
+    { "col2im", "convolution_layer.cs" },
+    { "direct_convolution1x1", "direct_convolution1x1.cs" },
+    { "direct_convolution3x3", "direct_convolution3x3.cs" },
+    { "direct_convolution5x5", "direct_convolution5x5.cs" },
+    { "pooling_layer_2", "pooling_layer.cs" },
+    { "pooling_layer_3", "pooling_layer.cs" },
+    { "pooling_layer_7", "pooling_layer.cs" },
+    { "pooling_layer_3_optimized", "pooling_layer.cs" },
+    { "pooling_layer_n", "pooling_layer.cs" },
+    { "fill_image_borders_replicate", "fill_border.cs" },
+    { "fill_image_borders_constant", "fill_border.cs" },
+    { "gemm_accumulate_biases", "gemm.cs" },
+    { "gemm_interleave4x4", "gemm.cs" },
+    { "gemm_ma", "gemm.cs" },
+    { "gemm_mm_interleaved_transposed", "gemm.cs" },
+    { "gemm_mm_floating_point", "gemm.cs" },
+    { "gemm_transpose1x4", "gemm.cs" },
+    { "im2col_kernel3x3_padx0_pady0", "convolution_layer.cs" },
+    { "im2col_generic", "convolution_layer.cs" },
+    { "im2col_reduced", "convolution_layer.cs" },
+    { "transpose", "transpose.cs" },
+    { "activation_layer", "activation_layer.cs" },
+    { "softmax_layer_max", "softmax_layer.cs" },
+    { "softmax_layer_shift_exp_sum", "softmax_layer.cs" },
+    { "softmax_layer_norm", "softmax_layer.cs" },
+    { "pixelwise_mul_float", "pixelwise_mul_float.cs" },
+    { "normalization_layer", "normalization_layer.cs" },
+    { "batchnormalization_layer", "batchnormalization_layer.cs" },
+    { "concatenate_depth", "concatenate.cs" },
+    { "dropout", "dropout.cs" },
+};
+
+const std::map<std::string, std::string> GCKernelLibrary::_program_source_map =
+{
+#ifdef EMBEDDED_KERNELS
+    {
+        "absdiff.cs",
+#include "./cs_shaders/absdiff.csembed"
+    },
+    {
+        "convolution_layer.cs",
+#include "./cs_shaders/convolution_layer.csembed"
+    },
+    {
+        "direct_convolution1x1.cs",
+#include "./cs_shaders/direct_convolution1x1.csembed"
+    },
+    {
+        "direct_convolution3x3.cs",
+#include "./cs_shaders/direct_convolution3x3.csembed"
+    },
+    {
+        "direct_convolution5x5.cs",
+#include "./cs_shaders/direct_convolution5x5.csembed"
+    },
+    {
+        "pooling_layer.cs",
+#include "./cs_shaders/pooling_layer.csembed"
+    },
+    {
+        "fill_border.cs",
+#include "./cs_shaders/fill_border.csembed"
+    },
+    {
+        "gemm.cs",
+#include "./cs_shaders/gemm.csembed"
+    },
+    {
+        "transpose.cs",
+#include "./cs_shaders/transpose.csembed"
+    },
+    {
+        "activation_layer.cs",
+#include "./cs_shaders/activation_layer.csembed"
+    },
+    {
+        "softmax_layer.cs",
+#include "./cs_shaders/softmax_layer.csembed"
+    },
+    {
+        "pixelwise_mul_float.cs",
+#include "./cs_shaders/pixelwise_mul_float.csembed"
+    },
+    {
+        "normalization_layer.cs",
+#include "./cs_shaders/normalization_layer.csembed"
+    },
+    {
+        "batchnormalization_layer.cs",
+#include "./cs_shaders/batchnormalization_layer.csembed"
+    },
+    {
+        "concatenate.cs",
+#include "./cs_shaders/concatenate.csembed"
+    },
+    {
+        "dropout.cs",
+#include "./cs_shaders/dropout.csembed"
+    },
+#endif /* EMBEDDED_KERNELS */
+};
+
+GCKernelLibrary::GCKernelLibrary()
+    : _display(EGL_NO_DISPLAY), _context(EGL_NO_CONTEXT), _frame_buffer(0), _tex_rt(0), _own_context(false), _shader_path("./"), _programs_map(), _built_programs_map()
+{
+}
+
+GCKernelLibrary &GCKernelLibrary::get()
+{
+    static GCKernelLibrary _kernel_library;
+    return _kernel_library;
+}
+
+GCKernel GCKernelLibrary::create_kernel(const std::string &shader_name, const StringSet &build_options_set) const
+{
+    // Find which program contains the kernel
+    auto shader_program_it = _shader_program_map.find(shader_name);
+
+    if(_shader_program_map.end() == shader_program_it)
+    {
+        ARM_COMPUTE_ERROR("Shader %s not found in the GCKernelLibrary", shader_name.c_str());
+    }
+
+    // Check if the program has been built before with same build options.
+    const std::string program_name       = shader_program_it->second;
+    const std::string build_options      = stringify_set(build_options_set);
+    const std::string built_program_name = program_name + "_" + build_options;
+    auto              built_program_it   = _built_programs_map.find(built_program_name);
+
+    GCKernel kernel;
+
+    if(_built_programs_map.end() != built_program_it)
+    {
+        // If program has been built, retrieve to create kernel from it
+        kernel = built_program_it->second;
+        kernel.use();
+    }
+    else
+    {
+        GCProgram program = load_program(program_name);
+
+        std::string source_name = _shader_path + shader_program_it->second;
+
+        // load shader
+        GLuint shader = program.compile_shader(build_options);
+
+        // Build program
+        GLuint gles_program = program.link_program(shader);
+
+        // Create GCKernel
+        kernel = GCKernel(shader_name, gles_program);
+
+        // Add built program to internal map
+        _built_programs_map.emplace(built_program_name, kernel);
+    }
+
+    return kernel;
+}
+
+const std::string GCKernelLibrary::preprocess_shader(const std::string &shader_source) const
+{
+    enum class ParserStage
+    {
+        FIRST,
+        SKIP_COMMENTS = FIRST,
+        RESOLVE_INCLUDES,
+        SKIP_PREPROCESSOR_DIRECTIVES,
+        SEARCH_MACRO_DEFINITIONS,
+        EXPAND_MACRO_USES,
+        LAST
+    };
+
+    struct MacroDefinitionInfo
+    {
+        const std::vector<std::string> param_list;
+        const std::string              content;
+    };
+
+    // Found macro definitions so far
+    std::map<const std::string, const MacroDefinitionInfo> macro_definitions;
+
+    // Define a GLES compute shader parser function
+    std::function<std::string(const std::string &, ParserStage, int)> cs_parser;
+    cs_parser = [&](const std::string & src, ParserStage stage, int nested_level) -> std::string
+    {
+        std::string dst;
+
+        if(stage == ParserStage::LAST || std::regex_match(src, std::regex(R"(\s*)")))
+        {
+            return src;
+        }
+        auto next_stage = static_cast<ParserStage>(static_cast<int>(stage) + 1);
+
+        std::string search_pattern;
+        switch(stage)
+        {
+            case ParserStage::SKIP_COMMENTS:
+                search_pattern = R"((/\*([^*]|\n|(\*+([^*/]|\n)))*\*+/)|(//.*))";
+                break;
+            case ParserStage::RESOLVE_INCLUDES:
+                search_pattern = R"rgx((?:^|\n)[ \t]*#include "(.*)")rgx";
+                break;
+            case ParserStage::SKIP_PREPROCESSOR_DIRECTIVES:
+                search_pattern = R"((^|\n)[ \t]*(#ifdef|#ifndef|#if)[^\n]+)";
+                break;
+            case ParserStage::SEARCH_MACRO_DEFINITIONS:
+                search_pattern = R"((?:^|\n)[ \t]*#define[ \t]+(\w+)(?:\((\w+(?:[ \t]*,[ \t]*\w+)*)\))?(?: |\t|\\\n)*((?:(?:[^\\\n]|\\[^\n])*\\+\n)*(?:[ \t]*[^ \t\n]+)*)[ \t]*)";
+                break;
+            case ParserStage::EXPAND_MACRO_USES:
+            {
+                if(macro_definitions.empty())
+                {
+                    // Nothing to expand
+                    return src;
+                }
+                int i = 0;
+                for(auto &def : macro_definitions)
+                {
+                    if(i == 0)
+                    {
+                        search_pattern = R"((\b)" + def.first;
+                    }
+                    else
+                    {
+                        search_pattern += R"(\b|\b)" + def.first;
+                    }
+                    i++;
+                }
+                search_pattern += R"(\b))";
+                break;
+            }
+            default:
+                break;
+        }
+
+        std::regex  search_regex(search_pattern);
+        std::smatch match;
+        ptrdiff_t   parsed_pos = 0;
+        if(std::regex_search(src, match, search_regex))
+        {
+            // Pass the content before the match to the next stage
+            dst.append(cs_parser(src.substr(0, match.position()), next_stage, 0));
+            parsed_pos = match.position() + match.length();
+
+            // Deal with the matched content
+            switch(stage)
+            {
+                case ParserStage::RESOLVE_INCLUDES:
+                {
+                    // Replace with the included file contents
+                    // And parse the content from the first stage
+                    const std::string source_name = _shader_path + match.str(1);
+                    dst.append(cs_parser(read_file(source_name, false), ParserStage::FIRST, 0));
+                    break;
+                }
+                case ParserStage::SEARCH_MACRO_DEFINITIONS:
+                {
+                    std::regex                     params_regex(R"(\b\w+\b)");
+                    const std::string              macro_param_str = match.str(2);
+                    const std::vector<std::string> macro_param_list(
+                        std::sregex_token_iterator(macro_param_str.begin(),
+                                                   macro_param_str.end(),
+                                                   params_regex),
+                        std::sregex_token_iterator());
+
+                    const MacroDefinitionInfo info =
+                    {
+                        macro_param_list,
+                        match.str(3)
+                    };
+                    // Collect the macro definition data and not change the shader source
+                    macro_definitions.insert(std::pair<const std::string, const MacroDefinitionInfo>(match.str(1), info));
+                    dst.append(match.str());
+                    break;
+                }
+                case ParserStage::EXPAND_MACRO_USES:
+                {
+                    ptrdiff_t                args_str_length = 0;
+                    std::vector<std::string> args_list;
+
+                    // Walk through argument list, because the regular expression does NOT support nested parentheses
+                    size_t cur_args_str_pos = match.position() + match.length();
+                    if(src[cur_args_str_pos++] == '(')
+                    {
+                        int       nested_parentheses = 0;
+                        ptrdiff_t cur_arg_pos        = cur_args_str_pos;
+                        ptrdiff_t cur_arg_length     = 0;
+
+                        args_str_length++;
+                        while(src[cur_args_str_pos] != ')' || nested_parentheses != 0)
+                        {
+                            switch(src[cur_args_str_pos++])
+                            {
+                                case '(':
+                                    nested_parentheses++;
+                                    cur_arg_length++;
+                                    break;
+                                case ',':
+                                    if(nested_parentheses == 0)
+                                    {
+                                        args_list.push_back(src.substr(cur_arg_pos, cur_arg_length));
+                                        cur_arg_pos    = cur_args_str_pos;
+                                        cur_arg_length = 0;
+                                    }
+                                    else
+                                    {
+                                        cur_arg_length++;
+                                    }
+                                    break;
+                                case ' ':
+                                case '\t':
+                                    if(cur_arg_length == 0)
+                                    {
+                                        cur_arg_pos++;
+                                    }
+                                    else
+                                    {
+                                        cur_arg_length++;
+                                    }
+                                    break;
+                                case ')':
+                                    nested_parentheses--;
+                                // no break here!
+                                default:
+                                    cur_arg_length++;
+                                    break;
+                            }
+                            args_str_length++;
+                        }
+                        if(src[cur_args_str_pos] == ')' && nested_parentheses == 0)
+                        {
+                            args_list.push_back(src.substr(cur_arg_pos, cur_arg_length));
+                        }
+                        args_str_length++;
+                    }
+
+                    std::string                    expanded_content = match.str();
+                    const std::vector<std::string> macro_param_list = macro_definitions.at(match.str()).param_list;
+
+                    if((nested_level != 0 || !macro_param_list.empty()) && macro_param_list.size() == args_list.size())
+                    {
+                        parsed_pos += args_str_length;
+                        expanded_content = macro_definitions.at(match.str()).content;
+                        size_t i         = 0;
+                        for(auto &param_name : macro_param_list)
+                        {
+                            std::regex params_regex(R"(\b)" + param_name + R"(\b)");
+                            expanded_content.assign(std::regex_replace(expanded_content, params_regex, args_list[i]));
+                            ++i;
+                        }
+                        // Expand macro recursively
+                        expanded_content = cs_parser(expanded_content, stage, nested_level + 1);
+
+                        if(nested_level == 0)
+                        {
+                            const std::regex token_pasting_rgx = std::regex(R"(\b##\b)");
+                            if(std::regex_search(expanded_content, token_pasting_rgx))
+                            {
+                                // Remove token pasting operator "##"
+                                expanded_content.assign(std::regex_replace(expanded_content, std::regex(token_pasting_rgx), ""));
+                                // Trim trailing whitespace
+                                expanded_content.assign(std::regex_replace(expanded_content, std::regex(R"([ \t]*\\\n)"), "\n"));
+                            }
+                            else
+                            {
+                                // Do not expand the macro if the result does not have token pasting operator "##"
+                                expanded_content = src.substr(match.position(), match.length() + args_str_length);
+                            }
+                        }
+                    }
+                    dst.append(expanded_content);
+                    break;
+                }
+                case ParserStage::SKIP_COMMENTS:
+                case ParserStage::SKIP_PREPROCESSOR_DIRECTIVES:
+                default:
+                    dst.append(match.str());
+                    break;
+            }
+            next_stage = stage;
+        }
+        dst.append(cs_parser(src.substr(parsed_pos, src.length() - parsed_pos), next_stage, 0));
+
+        return dst;
+    };
+
+    return cs_parser(shader_source, ParserStage::FIRST, 0);
+}
+
+const GCProgram &GCKernelLibrary::load_program(const std::string &program_name) const
+{
+    const auto program_it = _programs_map.find(program_name);
+
+    if(program_it != _programs_map.end())
+    {
+        return program_it->second;
+    }
+
+    GCProgram program;
+
+#ifdef EMBEDDED_KERNELS
+    const auto program_source_it = _program_source_map.find(program_name);
+
+    if(_program_source_map.end() == program_source_it)
+    {
+        ARM_COMPUTE_ERROR("Embedded program for %s does not exist.", program_name.c_str());
+    }
+
+    // TODO(APPBROWSER-298): Do not call shader preprocessor here
+    //       We should do the preprocess at compile time
+    //       The preprocess_shader function is used for support "#include" directive and token pasting operator "##".
+    //       This job could be done at compile time by using a python script in order to get better performance at runtime.
+    //       BTW: We usually defined EMBEDDED_KERNELS in release build.
+    program = GCProgram(program_name, preprocess_shader(program_source_it->second));
+#else  /* EMBEDDED_KERNELS */
+    // Check for binary
+    std::string source_name = _shader_path + program_name;
+    if(std::ifstream(source_name).is_open())
+    {
+        program = GCProgram(program_name, preprocess_shader(read_file(source_name, false)));
+    }
+    else
+    {
+        ARM_COMPUTE_ERROR("Shader file %s does not exist.", source_name.c_str());
+    }
+#endif /* EMBEDDED_KERNELS */
+
+    // Insert program to program map
+    const auto new_program = _programs_map.emplace(program_name, std::move(program));
+
+    return new_program.first->second;
+}
+
+void GCKernelLibrary::setup_context()
+{
+    EGLBoolean res;
+    _display = eglGetDisplay(EGL_DEFAULT_DISPLAY);
+
+    ARM_COMPUTE_ERROR_ON_MSG(_display == EGL_NO_DISPLAY, "Failed to get display: 0x%x.", eglGetError());
+
+    res = eglInitialize(_display, nullptr, nullptr);
+
+    ARM_COMPUTE_ERROR_ON_MSG(res == EGL_FALSE, "Failed to initialize egl: 0x%x.", eglGetError());
+    ARM_COMPUTE_UNUSED(res);
+
+    const char *egl_extension_st = eglQueryString(_display, EGL_EXTENSIONS);
+    ARM_COMPUTE_ERROR_ON_MSG((strstr(egl_extension_st, "EGL_KHR_create_context") == nullptr), "Failed to query EGL_KHR_create_context");
+    ARM_COMPUTE_ERROR_ON_MSG((strstr(egl_extension_st, "EGL_KHR_surfaceless_context") == nullptr), "Failed to query EGL_KHR_surfaceless_context");
+    ARM_COMPUTE_UNUSED(egl_extension_st);
+
+    const EGLint config_attribs[] =
+    {
+        EGL_RENDERABLE_TYPE, EGL_OPENGL_ES3_BIT_KHR,
+        EGL_NONE
+    };
+    EGLConfig cfg;
+    EGLint    count;
+
+    res = eglChooseConfig(_display, config_attribs, &cfg, 1, &count);
+
+    ARM_COMPUTE_ERROR_ON_MSG(res == EGL_FALSE, "Failed to choose config: 0x%x.", eglGetError());
+    ARM_COMPUTE_UNUSED(res);
+
+    res = eglBindAPI(EGL_OPENGL_ES_API);
+
+    ARM_COMPUTE_ERROR_ON_MSG(res == EGL_FALSE, "Failed to bind api: 0x%x.", eglGetError());
+
+    const EGLint attribs[] =
+    {
+        EGL_CONTEXT_CLIENT_VERSION, 3,
+        EGL_NONE
+    };
+    _context = eglCreateContext(_display,
+                                cfg,
+                                EGL_NO_CONTEXT,
+                                attribs);
+
+    ARM_COMPUTE_ERROR_ON_MSG(_context == EGL_NO_CONTEXT, "Failed to create context: 0x%x.", eglGetError());
+    ARM_COMPUTE_UNUSED(res);
+
+    res = eglMakeCurrent(_display, EGL_NO_SURFACE, EGL_NO_SURFACE, _context);
+
+    ARM_COMPUTE_ERROR_ON_MSG(res == EGL_FALSE, "Failed to make current: 0x%x.", eglGetError());
+    ARM_COMPUTE_UNUSED(res);
+}
+
+void GCKernelLibrary::setup_dummy_fbo()
+{
+    ARM_COMPUTE_GL_CHECK(glGenFramebuffers(1, &_frame_buffer));
+    ARM_COMPUTE_GL_CHECK(glBindFramebuffer(GL_FRAMEBUFFER, _frame_buffer));
+    ARM_COMPUTE_GL_CHECK(glGenTextures(1, &_tex_rt));
+    ARM_COMPUTE_GL_CHECK(glBindTexture(GL_TEXTURE_2D, _tex_rt));
+    ARM_COMPUTE_GL_CHECK(glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, nullptr));
+    ARM_COMPUTE_GL_CHECK(glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, _tex_rt, 0));
+}
+
+GCKernelLibrary::~GCKernelLibrary()
+{
+    for(auto &program : _built_programs_map)
+    {
+        static_cast<GCKernel>(program.second).cleanup();
+    }
+
+    ARM_COMPUTE_GL_CHECK(glBindTexture(GL_TEXTURE_2D, 0));
+    ARM_COMPUTE_GL_CHECK(glBindFramebuffer(GL_FRAMEBUFFER, 0));
+    ARM_COMPUTE_GL_CHECK(glDeleteTextures(1, &_tex_rt));
+    ARM_COMPUTE_GL_CHECK(glDeleteFramebuffers(1, &_frame_buffer));
+
+    if(_own_context)
+    {
+        eglDestroyContext(_display, _context);
+        eglTerminate(_display);
+
+        _context = EGL_NO_CONTEXT;
+        _display = EGL_NO_DISPLAY;
+    }
+}
+
+std::string GCKernelLibrary::stringify_set(const StringSet &s) const
+{
+    std::string concat_set;
+
+    // Concatenate set
+    for(const auto &el : s)
+    {
+        concat_set += el + "\n";
+    }
+
+    return concat_set;
+}
diff --git a/src/core/GLES_COMPUTE/IGCKernel.cpp b/src/core/GLES_COMPUTE/IGCKernel.cpp
new file mode 100644
index 0000000000..154a2c0c66
--- /dev/null
+++ b/src/core/GLES_COMPUTE/IGCKernel.cpp
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+
+#include <cstddef>
+#include <sstream>
+
+using namespace arm_compute;
+
+void arm_compute::enqueue(IGCKernel &kernel, const Window &window, const gles::NDRange &lws)
+{
+    ARM_COMPUTE_UNUSED(kernel);
+
+    if(kernel.kernel().get_program() == 0)
+    {
+        return;
+    }
+
+    ARM_COMPUTE_ERROR_ON((0 == (window.x().end() - window.x().start())) || (0 == (window.y().end() - window.y().start())));
+
+    ARM_COMPUTE_ERROR_ON_MSG((((window.x().end() - window.x().start()) % (window.x().step() * lws[0])) != 0),
+                             "window x end =%d, start=%d, step=%d, lws x=%d", window.x().end(), window.x().start(), window.x().step(), lws[0]);
+    ARM_COMPUTE_ERROR_ON_MSG((((window.y().end() - window.y().start()) % (window.y().step() * lws[1])) != 0),
+                             "window y end =%d, start=%d, step=%d, lws y=%d", window.y().end(), window.y().start(), window.y().step(), lws[1]);
+    ARM_COMPUTE_ERROR_ON_MSG((((window.z().end() - window.z().start()) % (window.z().step() * lws[2])) != 0),
+                             "window z end =%d, start=%d, step=%d, lws z=%d", window.z().end(), window.z().start(), window.z().step(), lws[2]);
+
+    ARM_COMPUTE_GL_CHECK(glDispatchCompute((window.x().end() - window.x().start()) / (window.x().step() / lws[0]),
+                                           (window.y().end() - window.y().start()) / (window.y().step() / lws[1]),
+                                           (window.z().end() - window.z().start()) / (window.z().step() / lws[2])));
+}
+
+IGCKernel::IGCKernel()
+    : _kernel()
+{
+}
+
+GCKernel &IGCKernel::kernel()
+{
+    return _kernel;
+}
+
+template <unsigned int dimension_size>
+unsigned int           IGCKernel::num_arguments_per_tensor() const
+{
+    return 2 + 2 * dimension_size;
+}
+
+template <unsigned int dimension_size>
+void IGCKernel::add_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON(tensor == nullptr);
+
+    const ITensorInfo *info    = tensor->info();
+    const Strides     &strides = info->strides_in_bytes();
+
+    // Calculate offset to the start of the window
+    unsigned int offset_first_element = info->offset_first_element_in_bytes();
+
+    for(unsigned int n = 0; n < info->num_dimensions(); ++n)
+    {
+        offset_first_element += window[n].start() * strides[n];
+    }
+
+    unsigned int idx_start = idx;
+
+    for(unsigned int dimension = 0; dimension < dimension_size; dimension++)
+    {
+        _kernel.set_params(idx++, strides[dimension]);
+        _kernel.set_params(idx++, strides[dimension] * window[dimension].step());
+    }
+
+    _kernel.set_params(idx++, offset_first_element);
+    _kernel.set_params(idx++, param.buffer_data_type_shift);
+
+    ARM_COMPUTE_GL_CHECK(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, param.binding_point, tensor->gc_buffer()));
+
+    ARM_COMPUTE_ERROR_ON_MSG(idx_start + num_arguments_per_tensor<dimension_size>() != idx,
+                             "add_%dD_tensor_argument() is supposed to add exactly %d arguments to the kernel", dimension_size, num_arguments_per_tensor<dimension_size>());
+    ARM_COMPUTE_UNUSED(idx_start);
+}
+
+void IGCKernel::add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window)
+{
+    add_tensor_argument<1>(idx, tensor, BufferParam(binding_point, 0), window);
+}
+
+void IGCKernel::add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window)
+{
+    add_tensor_argument<1>(idx, tensor, param, window);
+}
+
+void IGCKernel::add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window)
+{
+    add_tensor_argument<2>(idx, tensor, BufferParam(binding_point, 0), window);
+}
+
+void IGCKernel::add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window)
+{
+    add_tensor_argument<2>(idx, tensor, param, window);
+}
+
+void IGCKernel::add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window)
+{
+    add_tensor_argument<3>(idx, tensor, BufferParam(binding_point, 0), window);
+}
+
+void IGCKernel::add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam &param, const Window &window)
+{
+    add_tensor_argument<3>(idx, tensor, param, window);
+}
+
+unsigned int IGCKernel::num_arguments_per_1D_tensor() const
+{
+    return num_arguments_per_tensor<1>();
+}
+
+unsigned int IGCKernel::num_arguments_per_2D_tensor() const
+{
+    return num_arguments_per_tensor<2>();
+}
+
+unsigned int IGCKernel::num_arguments_per_3D_tensor() const
+{
+    return num_arguments_per_tensor<3>();
+}
diff --git a/src/core/GLES_COMPUTE/IGCSimple2DKernel.cpp b/src/core/GLES_COMPUTE/IGCSimple2DKernel.cpp
new file mode 100644
index 0000000000..5bb479ed24
--- /dev/null
+++ b/src/core/GLES_COMPUTE/IGCSimple2DKernel.cpp
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h"
+
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+using namespace arm_compute;
+
+void IGCSimple2DKernel::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
+
+    _kernel.use();
+
+    Window slice = window.first_slice_window_2D();
+
+    do
+    {
+        unsigned int idx = 0;
+        add_2D_tensor_argument(idx, _input, 1, slice);
+        add_2D_tensor_argument(idx, _output, 2, slice);
+        _kernel.update_shader_params();
+        enqueue(*this, slice);
+    }
+    while(window.slide_window_slice_2D(slice));
+}
diff --git a/src/core/GLES_COMPUTE/IGCSimple3DKernel.cpp b/src/core/GLES_COMPUTE/IGCSimple3DKernel.cpp
new file mode 100644
index 0000000000..61225d8533
--- /dev/null
+++ b/src/core/GLES_COMPUTE/IGCSimple3DKernel.cpp
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h"
+
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+using namespace arm_compute;
+
+void IGCSimple3DKernel::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+
+    Window slice = window.first_slice_window_3D();
+
+    _kernel.use();
+
+    do
+    {
+        unsigned int idx     = 0;
+        unsigned int binding = 1; // SSBO binding starts from 1.
+        add_3D_tensor_argument(idx, _input, binding++, slice);
+        add_3D_tensor_argument(idx, _output, binding++, slice);
+        _kernel.update_shader_params();
+        enqueue(*this, slice);
+    }
+    while(window.slide_window_slice_3D(slice));
+}
diff --git a/src/core/GLES_COMPUTE/IGCSimpleKernel.cpp b/src/core/GLES_COMPUTE/IGCSimpleKernel.cpp
new file mode 100644
index 0000000000..459601e68b
--- /dev/null
+++ b/src/core/GLES_COMPUTE/IGCSimpleKernel.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h"
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/IAccessWindow.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+using namespace arm_compute;
+
+IGCSimpleKernel::IGCSimpleKernel()
+    : _input(nullptr), _output(nullptr)
+{
+}
+
+void IGCSimpleKernel::configure(const IGCTensor *input, IGCTensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined, const BorderSize &border_size)
+{
+    _input  = input;
+    _output = output;
+
+    // Configure kernel window
+    Window                 win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size);
+    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+
+    update_window_and_padding(win,
+                              AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration),
+                              output_access);
+
+    output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size);
+
+    IGCKernel::configure(win);
+}
diff --git a/src/core/GLES_COMPUTE/IGCTensor.cpp b/src/core/GLES_COMPUTE/IGCTensor.cpp
new file mode 100644
index 0000000000..5576665243
--- /dev/null
+++ b/src/core/GLES_COMPUTE/IGCTensor.cpp
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+
+using namespace arm_compute;
+
+IGCTensor::IGCTensor()
+    : _mapping(nullptr)
+{
+}
+
+void IGCTensor::map(bool blocking)
+{
+    _mapping = do_map(blocking);
+}
+
+void IGCTensor::unmap()
+{
+    do_unmap();
+    _mapping = nullptr;
+}
+
+void IGCTensor::clear()
+{
+    this->map();
+    std::memset(static_cast<void *>(_mapping), 0, this->info()->total_size());
+    this->unmap();
+}
+
+uint8_t *IGCTensor::buffer() const
+{
+    return _mapping;
+}
diff --git a/src/core/GLES_COMPUTE/OpenGLES.cpp b/src/core/GLES_COMPUTE/OpenGLES.cpp
new file mode 100644
index 0000000000..fdfc085db2
--- /dev/null
+++ b/src/core/GLES_COMPUTE/OpenGLES.cpp
@@ -0,0 +1,820 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+
+#include <dlfcn.h>
+#include <iostream>
+#include <vector>
+
+using eglGetProcAddress_func         = __eglMustCastToProperFunctionPointerType EGLAPIENTRY (*)(const char *procname);
+using eglBindAPI_func                = EGLBoolean EGLAPIENTRY (*)(EGLenum api);
+using eglChooseConfig_func           = EGLBoolean EGLAPIENTRY (*)(EGLDisplay dpy, const EGLint *attrib_list, EGLConfig *configs, EGLint config_size, EGLint *num_config);
+using eglCreateContext_func          = EGLContext EGLAPIENTRY (*)(EGLDisplay dpy, EGLConfig config, EGLContext share_context, const EGLint *attrib_list);
+using eglDestroyContext_func         = EGLBoolean EGLAPIENTRY (*)(EGLDisplay dpy, EGLContext ctx);
+using eglGetDisplay_func             = EGLDisplay EGLAPIENTRY (*)(EGLNativeDisplayType display_id);
+using eglInitialize_func             = EGLBoolean EGLAPIENTRY (*)(EGLDisplay dpy, EGLint *major, EGLint *minor);
+using eglMakeCurrent_func            = EGLBoolean EGLAPIENTRY (*)(EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext ctx);
+using eglTerminate_func              = EGLBoolean EGLAPIENTRY (*)(EGLDisplay dpy);
+using eglGetError_func               = EGLint         EGLAPIENTRY (*)();
+using eglQueryString_func            = char const * EGLAPIENTRY (*)(EGLDisplay dpy, EGLint name);
+using glAttachShader_func            = void GL_APIENTRY (*)(GLuint program, GLuint shader);
+using glCompileShader_func           = void GL_APIENTRY (*)(GLuint shader);
+using glCreateProgram_func           = GLuint GL_APIENTRY (*)();
+using glCreateShader_func            = GLuint GL_APIENTRY (*)(GLenum type);
+using glDeleteProgram_func           = void GL_APIENTRY (*)(GLuint program);
+using glDeleteShader_func            = void GL_APIENTRY (*)(GLuint shader);
+using glDetachShader_func            = void GL_APIENTRY (*)(GLuint program, GLuint shader);
+using glGetProgramInfoLog_func       = void GL_APIENTRY (*)(GLuint program, GLsizei bufsize, GLsizei *length, GLchar *infolog);
+using glGetProgramiv_func            = void GL_APIENTRY (*)(GLuint program, GLenum pname, GLint *params);
+using glGetShaderInfoLog_func        = void GL_APIENTRY (*)(GLuint shader, GLsizei bufsize, GLsizei *length, GLchar *infolog);
+using glGetShaderiv_func             = void GL_APIENTRY (*)(GLuint shader, GLenum pname, GLint *params);
+using glLinkProgram_func             = void GL_APIENTRY (*)(GLuint program);
+using glShaderSource_func            = void GL_APIENTRY (*)(GLuint shader, GLsizei count, const GLchar *const *string, const GLint *length);
+using glUseProgram_func              = void GL_APIENTRY (*)(GLuint program);
+using glBindBuffer_func              = void GL_APIENTRY (*)(GLenum target, GLuint buffer);
+using glBindBufferBase_func          = void GL_APIENTRY (*)(GLenum target, GLuint index, GLuint buffer);
+using glBufferData_func              = void GL_APIENTRY (*)(GLenum target, GLsizeiptr size, const GLvoid *data, GLenum usage);
+using glDeleteBuffers_func           = void GL_APIENTRY (*)(GLsizei n, const GLuint *buffers);
+using glDispatchCompute_func         = void GL_APIENTRY (*)(GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z);
+using glFlush_func                   = void      GL_APIENTRY (*)();
+using glGenBuffers_func              = void GL_APIENTRY (*)(GLsizei n, GLuint *buffers);
+using glGetProgramResourceIndex_func = GLuint GL_APIENTRY (*)(GLuint program, GLenum programInterface, const GLchar *name);
+using glGetUniformLocation_func      = GLint GL_APIENTRY (*)(GLuint program, const GLchar *name);
+using glMapBufferRange_func          = void *GL_APIENTRY (*)(GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access);
+using glMemoryBarrier_func           = void GL_APIENTRY (*)(GLbitfield barriers);
+using glUniform1ui_func              = void GL_APIENTRY (*)(GLint location, GLuint v0);
+using glUnmapBuffer_func             = GLboolean GL_APIENTRY (*)(GLenum target);
+using glGetError_func                = GLenum              GL_APIENTRY (*)();
+using glGetActiveUniformBlockiv_func = void GL_APIENTRY (*)(GLuint program, GLuint uniformBlockIndex, GLenum pname, GLint *params);
+using glUniformBlockBinding_func     = void GL_APIENTRY (*)(GLuint program, GLuint uniformBlockIndex, GLuint uniformBlockBinding);
+using glGetUniformBlockIndex_func    = GLuint GL_APIENTRY (*)(GLuint program, const GLchar *uniformBlockName);
+using glGenTextures_func             = void GL_APIENTRY (*)(GLsizei n, GLuint *textures);
+using glDeleteTextures_func          = void GL_APIENTRY (*)(GLsizei n, const GLuint *textures);
+using glBindTexture_func             = void GL_APIENTRY (*)(GLenum target, GLuint texture);
+using glTexImage2D_func              = void GL_APIENTRY (*)(GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type,
+                                                            const GLvoid *pixels);
+using glGenFramebuffers_func      = void GL_APIENTRY (*)(GLsizei n, GLuint *framebuffers);
+using glDeleteFramebuffers_func   = void GL_APIENTRY (*)(GLsizei n, const GLuint *framebuffers);
+using glBindFramebuffer_func      = void GL_APIENTRY (*)(GLenum target, GLuint framebuffer);
+using glFramebufferTexture2D_func = void GL_APIENTRY (*)(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
+
+class GLESSymbols
+{
+private:
+    void init()
+    {
+        void *egl_handle    = dlopen("libEGL.so", RTLD_LAZY | RTLD_LOCAL);
+        void *glesv2_handle = dlopen("libGLESv2.so", RTLD_LAZY | RTLD_LOCAL);
+        void *glesv3_handle = dlopen("libGLESv3.so", RTLD_LAZY | RTLD_LOCAL);
+        if(egl_handle == nullptr)
+        {
+            std::cerr << "Can't load libEGL.so: " << dlerror() << std::endl;
+        }
+        else
+        {
+#undef EGL_ENTRY
+#define EGL_ENTRY(_api) _api = reinterpret_cast<_api##_func>(dlsym(egl_handle, #_api));
+#include "./egl_entries.in"
+#undef EGL_ENTRY
+
+            if(eglGetProcAddress != nullptr)
+            {
+#undef EGL_ENTRY
+#define EGL_ENTRY(_api)   \
+    if((_api) == nullptr) \
+        (_api) = reinterpret_cast<_api##_func>(eglGetProcAddress(#_api));
+#include "./egl_entries.in"
+#undef EGL_ENTRY
+
+#undef GL_ENTRY
+#define GL_ENTRY(_api) _api = reinterpret_cast<_api##_func>(eglGetProcAddress(#_api));
+#include "./gl_entries.in"
+#undef GL_ENTRY
+            }
+
+            std::vector<void *> handles = { glesv3_handle, glesv2_handle };
+            for(auto &handle : handles)
+            {
+                if(handle != nullptr)
+                {
+#undef GL_ENTRY
+#define GL_ENTRY(_api)    \
+    if((_api) == nullptr) \
+        (_api) = reinterpret_cast<_api##_func>(dlsym(handle, #_api));
+#include "./gl_entries.in"
+#undef GL_ENTRY
+                }
+            }
+
+            if(glesv3_handle != nullptr)
+            {
+                dlclose(glesv3_handle);
+            }
+            if(glesv2_handle != nullptr)
+            {
+                dlclose(glesv2_handle);
+            }
+            dlclose(egl_handle);
+        }
+    }
+    bool _initialized = false;
+
+public:
+    static GLESSymbols &get()
+    {
+        static GLESSymbols symbols = GLESSymbols();
+        if(!symbols._initialized)
+        {
+            symbols._initialized = true;
+            symbols.init();
+        }
+
+        return symbols;
+    }
+
+#undef EGL_ENTRY
+#undef GL_ENTRY
+#define EGL_ENTRY(_api) _api##_func _api = nullptr;
+#define GL_ENTRY(_api) EGL_ENTRY(_api)
+#include "./egl_entries.in"
+#include "./gl_entries.in"
+#undef EGL_ENTRY
+#undef GL_ENTRY
+};
+
+bool arm_compute::opengles31_is_available()
+{
+    return GLESSymbols::get().glDispatchCompute != nullptr;
+}
+
+__eglMustCastToProperFunctionPointerType EGLAPIENTRY eglGetProcAddress(const char *procname)
+{
+    auto func = GLESSymbols::get().eglGetProcAddress;
+    if(func != nullptr)
+    {
+        return func(procname);
+    }
+    else
+    {
+        return nullptr;
+    }
+}
+
+EGLBoolean EGLAPIENTRY eglBindAPI(EGLenum api)
+{
+    auto func = GLESSymbols::get().eglBindAPI;
+    if(func != nullptr)
+    {
+        return func(api);
+    }
+    else
+    {
+        return EGL_FALSE;
+    }
+}
+
+EGLBoolean EGLAPIENTRY eglChooseConfig(EGLDisplay dpy, const EGLint *attrib_list, EGLConfig *configs, EGLint config_size, EGLint *num_config)
+{
+    auto func = GLESSymbols::get().eglChooseConfig;
+    if(func != nullptr)
+    {
+        return func(dpy, attrib_list, configs, config_size, num_config);
+    }
+    else
+    {
+        return EGL_FALSE;
+    }
+}
+
+EGLContext EGLAPIENTRY eglCreateContext(EGLDisplay dpy, EGLConfig config, EGLContext share_context, const EGLint *attrib_list)
+{
+    auto func = GLESSymbols::get().eglCreateContext;
+    if(func != nullptr)
+    {
+        return func(dpy, config, share_context, attrib_list);
+    }
+    else
+    {
+        return nullptr;
+    }
+}
+
+EGLBoolean EGLAPIENTRY eglDestroyContext(EGLDisplay dpy, EGLContext ctx)
+{
+    auto func = GLESSymbols::get().eglDestroyContext;
+    if(func != nullptr)
+    {
+        return func(dpy, ctx);
+    }
+    else
+    {
+        return EGL_FALSE;
+    }
+}
+
+EGLDisplay EGLAPIENTRY eglGetDisplay(EGLNativeDisplayType display_id)
+{
+    auto func = GLESSymbols::get().eglGetDisplay;
+    if(func != nullptr)
+    {
+        return func(display_id);
+    }
+    else
+    {
+        return nullptr;
+    }
+}
+
+EGLBoolean EGLAPIENTRY eglInitialize(EGLDisplay dpy, EGLint *major, EGLint *minor)
+{
+    auto func = GLESSymbols::get().eglInitialize;
+    if(func != nullptr)
+    {
+        return func(dpy, major, minor);
+    }
+    else
+    {
+        return EGL_FALSE;
+    }
+}
+
+EGLBoolean EGLAPIENTRY eglMakeCurrent(EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext ctx)
+{
+    auto func = GLESSymbols::get().eglMakeCurrent;
+    if(func != nullptr)
+    {
+        return func(dpy, draw, read, ctx);
+    }
+    else
+    {
+        return EGL_FALSE;
+    }
+}
+
+EGLBoolean EGLAPIENTRY eglTerminate(EGLDisplay dpy)
+{
+    auto func = GLESSymbols::get().eglTerminate;
+    if(func != nullptr)
+    {
+        return func(dpy);
+    }
+    else
+    {
+        return EGL_FALSE;
+    }
+}
+
+EGLint EGLAPIENTRY eglGetError()
+{
+    auto func = GLESSymbols::get().eglGetError;
+    if(func != nullptr)
+    {
+        return func();
+    }
+    else
+    {
+        return GL_NO_ERROR;
+    }
+}
+
+char const *EGLAPIENTRY eglQueryString(EGLDisplay dpy, EGLint name)
+{
+    auto func = GLESSymbols::get().eglQueryString;
+    if(func != nullptr)
+    {
+        return func(dpy, name);
+    }
+    else
+    {
+        return nullptr;
+    }
+}
+
+void GL_APIENTRY glAttachShader(GLuint program, GLuint shader)
+{
+    auto func = GLESSymbols::get().glAttachShader;
+    if(func != nullptr)
+    {
+        return func(program, shader);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glCompileShader(GLuint shader)
+{
+    auto func = GLESSymbols::get().glCompileShader;
+    if(func != nullptr)
+    {
+        return func(shader);
+    }
+    else
+    {
+        return;
+    }
+}
+
+GLuint GL_APIENTRY glCreateProgram()
+{
+    auto func = GLESSymbols::get().glCreateProgram;
+    if(func != nullptr)
+    {
+        return func();
+    }
+    else
+    {
+        return 0;
+    }
+}
+
+GLuint GL_APIENTRY glCreateShader(GLenum type)
+{
+    auto func = GLESSymbols::get().glCreateShader;
+    if(func != nullptr)
+    {
+        return func(type);
+    }
+    else
+    {
+        return 0;
+    }
+}
+
+void GL_APIENTRY glDeleteProgram(GLuint program)
+{
+    auto func = GLESSymbols::get().glDeleteProgram;
+    if(func != nullptr)
+    {
+        return func(program);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glDeleteShader(GLuint shader)
+{
+    auto func = GLESSymbols::get().glDeleteShader;
+    if(func != nullptr)
+    {
+        return func(shader);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glDetachShader(GLuint program, GLuint shader)
+{
+    auto func = GLESSymbols::get().glDetachShader;
+    if(func != nullptr)
+    {
+        return func(program, shader);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glGetProgramInfoLog(GLuint program, GLsizei bufSize, GLsizei *length, GLchar *infoLog)
+{
+    auto func = GLESSymbols::get().glGetProgramInfoLog;
+    if(func != nullptr)
+    {
+        return func(program, bufSize, length, infoLog);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glGetProgramiv(GLuint program, GLenum pname, GLint *params)
+{
+    auto func = GLESSymbols::get().glGetProgramiv;
+    if(func != nullptr)
+    {
+        return func(program, pname, params);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glGetShaderInfoLog(GLuint shader, GLsizei bufSize, GLsizei *length, GLchar *infoLog)
+{
+    auto func = GLESSymbols::get().glGetShaderInfoLog;
+    if(func != nullptr)
+    {
+        return func(shader, bufSize, length, infoLog);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glGetShaderiv(GLuint shader, GLenum pname, GLint *params)
+{
+    auto func = GLESSymbols::get().glGetShaderiv;
+    if(func != nullptr)
+    {
+        return func(shader, pname, params);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glLinkProgram(GLuint program)
+{
+    auto func = GLESSymbols::get().glLinkProgram;
+    if(func != nullptr)
+    {
+        return func(program);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glShaderSource(GLuint shader, GLsizei count, const GLchar *const *string, const GLint *length)
+{
+    auto func = GLESSymbols::get().glShaderSource;
+    if(func != nullptr)
+    {
+        return func(shader, count, string, length);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glUseProgram(GLuint program)
+{
+    auto func = GLESSymbols::get().glUseProgram;
+    if(func != nullptr)
+    {
+        return func(program);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glBindBuffer(GLenum target, GLuint buffer)
+{
+    auto func = GLESSymbols::get().glBindBuffer;
+    if(func != nullptr)
+    {
+        return func(target, buffer);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glBindBufferBase(GLenum target, GLuint index, GLuint buffer)
+{
+    auto func = GLESSymbols::get().glBindBufferBase;
+    if(func != nullptr)
+    {
+        return func(target, index, buffer);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glBufferData(GLenum target, GLsizeiptr size, const GLvoid *data, GLenum usage)
+{
+    auto func = GLESSymbols::get().glBufferData;
+    if(func != nullptr)
+    {
+        return func(target, size, data, usage);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glDeleteBuffers(GLsizei n, const GLuint *buffers)
+{
+    auto func = GLESSymbols::get().glDeleteBuffers;
+    if(func != nullptr)
+    {
+        return func(n, buffers);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glDispatchCompute(GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z)
+{
+    auto func = GLESSymbols::get().glDispatchCompute;
+    if(func != nullptr)
+    {
+        return func(num_groups_x, num_groups_y, num_groups_z);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glFlush(void)
+{
+    auto func = GLESSymbols::get().glFlush;
+    if(func != nullptr)
+    {
+        return func();
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glGenBuffers(GLsizei n, GLuint *buffers)
+{
+    auto func = GLESSymbols::get().glGenBuffers;
+    if(func != nullptr)
+    {
+        return func(n, buffers);
+    }
+    else
+    {
+        return;
+    }
+}
+
+GLuint GL_APIENTRY glGetProgramResourceIndex(GLuint program, GLenum programInterface, const GLchar *name)
+{
+    auto func = GLESSymbols::get().glGetProgramResourceIndex;
+    if(func != nullptr)
+    {
+        return func(program, programInterface, name);
+    }
+    else
+    {
+        return GL_INVALID_INDEX;
+    }
+}
+
+GLint GL_APIENTRY glGetUniformLocation(GLuint program, const GLchar *name)
+{
+    auto func = GLESSymbols::get().glGetUniformLocation;
+    if(func != nullptr)
+    {
+        return func(program, name);
+    }
+    else
+    {
+        return -1;
+    }
+}
+
+void *GL_APIENTRY glMapBufferRange(GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access)
+{
+    auto func = GLESSymbols::get().glMapBufferRange;
+    if(func != nullptr)
+    {
+        return func(target, offset, length, access);
+    }
+    else
+    {
+        return nullptr;
+    }
+}
+
+void GL_APIENTRY glMemoryBarrier(GLbitfield barriers)
+{
+    auto func = GLESSymbols::get().glMemoryBarrier;
+    if(func != nullptr)
+    {
+        return func(barriers);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glUniform1ui(GLint location, GLuint v0)
+{
+    auto func = GLESSymbols::get().glUniform1ui;
+    if(func != nullptr)
+    {
+        return func(location, v0);
+    }
+    else
+    {
+        return;
+    }
+}
+
+GLboolean GL_APIENTRY glUnmapBuffer(GLenum target)
+{
+    auto func = GLESSymbols::get().glUnmapBuffer;
+    if(func != nullptr)
+    {
+        return func(target);
+    }
+    else
+    {
+        return GL_FALSE;
+    }
+}
+
+GLenum GL_APIENTRY glGetError(void)
+{
+    auto func = GLESSymbols::get().glGetError;
+    if(func != nullptr)
+    {
+        return func();
+    }
+    else
+    {
+        return GL_NO_ERROR;
+    }
+}
+
+void GL_APIENTRY glGetActiveUniformBlockiv(GLuint program, GLuint uniformBlockIndex, GLenum pname, GLint *params)
+{
+    auto func = GLESSymbols::get().glGetActiveUniformBlockiv;
+    if(func != nullptr)
+    {
+        return func(program, uniformBlockIndex, pname, params);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glUniformBlockBinding(GLuint program, GLuint uniformBlockIndex, GLuint uniformBlockBinding)
+{
+    auto func = GLESSymbols::get().glUniformBlockBinding;
+    if(func != nullptr)
+    {
+        return func(program, uniformBlockIndex, uniformBlockBinding);
+    }
+    else
+    {
+        return;
+    }
+}
+
+GLuint GL_APIENTRY glGetUniformBlockIndex(GLuint program, const GLchar *uniformBlockName)
+{
+    auto func = GLESSymbols::get().glGetUniformBlockIndex;
+    if(func != nullptr)
+    {
+        return func(program, uniformBlockName);
+    }
+    else
+    {
+        return GL_INVALID_INDEX;
+    }
+}
+
+void GL_APIENTRY glGenTextures(GLsizei n, GLuint *textures)
+{
+    auto func = GLESSymbols::get().glGenTextures;
+    if(func != nullptr)
+    {
+        return func(n, textures);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glDeleteTextures(GLsizei n, const GLuint *textures)
+{
+    auto func = GLESSymbols::get().glDeleteTextures;
+    if(func != nullptr)
+    {
+        return func(n, textures);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glBindTexture(GLenum target, GLuint texture)
+{
+    auto func = GLESSymbols::get().glBindTexture;
+    if(func != nullptr)
+    {
+        return func(target, texture);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glTexImage2D(GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid *pixels)
+{
+    auto func = GLESSymbols::get().glTexImage2D;
+    if(func != nullptr)
+    {
+        return func(target, level, internalformat, width, height, border, format, type, pixels);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glGenFramebuffers(GLsizei n, GLuint *framebuffers)
+{
+    auto func = GLESSymbols::get().glGenFramebuffers;
+    if(func != nullptr)
+    {
+        return func(n, framebuffers);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glDeleteFramebuffers(GLsizei n, const GLuint *framebuffers)
+{
+    auto func = GLESSymbols::get().glDeleteFramebuffers;
+    if(func != nullptr)
+    {
+        return func(n, framebuffers);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glBindFramebuffer(GLenum target, GLuint framebuffer)
+{
+    auto func = GLESSymbols::get().glBindFramebuffer;
+    if(func != nullptr)
+    {
+        return func(target, framebuffer);
+    }
+    else
+    {
+        return;
+    }
+}
+
+void GL_APIENTRY glFramebufferTexture2D(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level)
+{
+    auto func = GLESSymbols::get().glFramebufferTexture2D;
+    if(func != nullptr)
+    {
+        return func(target, attachment, textarget, texture, level);
+    }
+    else
+    {
+        return;
+    }
+}
diff --git a/src/core/GLES_COMPUTE/cs_shaders/absdiff.cs b/src/core/GLES_COMPUTE/cs_shaders/absdiff.cs
new file mode 100644
index 0000000000..f6113e13eb
--- /dev/null
+++ b/src/core/GLES_COMPUTE/cs_shaders/absdiff.cs
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
+#include "helpers.h"
+
+layout(std140) uniform shader_params
+{
+    IMAGE_PARAM_DECLARATION(src1);
+    IMAGE_PARAM_DECLARATION(src2);
+    IMAGE_PARAM_DECLARATION(dst);
+};
+
+BUFFER_DECLARATION(src1, 1, uint, readonly);
+BUFFER_DECLARATION(src2, 2, uint, readonly);
+BUFFER_DECLARATION(dst, 3, uint, writeonly);
+
+/** Calculate the absolute difference of two input images.
+ *
+ * @param[in]  src1_ptr                           Pointer to the first source image. Supported data types: U8
+ * @param[in]  src1_stride_x                      Stride of the first source image in X dimension (in bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the first source image in Y dimension (in bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the first source image
+ * @param[in]  src2_ptr                           Pointer to the second source image. Supported data types: Same as @p in1_ptr
+ * @param[in]  src2_stride_x                      Stride of the second source image in X dimension (in bytes)
+ * @param[in]  src2_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                      Stride of the second source image in Y dimension (in bytes)
+ * @param[in]  src2_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes The offset of the first element in the second source image
+ * @param[out] dst_ptr                            Pointer to the destination image. Supported data types: Same as @p in1_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination image in X dimension (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination image in Y dimension (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination image
+ */
+void main(void)
+{
+    Image src1 = CONVERT_TO_IMAGE_STRUCT(src1);
+    Image src2 = CONVERT_TO_IMAGE_STRUCT(src2);
+    Image dst  = CONVERT_TO_IMAGE_STRUCT(dst);
+
+    uvec4 tmp1 = UNPACK(LOAD4(src1, CURRENT_OFFSET(src1)), uint, uvec4);
+    uvec4 tmp2 = UNPACK(LOAD4(src2, CURRENT_OFFSET(src2)), uint, uvec4);
+    uvec4 diff = uvec4(abs(ivec4(tmp1 - tmp2)));
+
+    STORE4(dst, CURRENT_OFFSET(dst), PACK(diff, uvec4, uint));
+}
diff --git a/src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs
new file mode 100644
index 0000000000..fc9da114f7
--- /dev/null
+++ b/src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs
@@ -0,0 +1,262 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
+
+#include "helpers.h"
+
+#ifdef DATA_TYPE_FP32
+precision highp float;
+#elif defined(DATA_TYPE_FP16)
+#if defined(LOGISTIC) || defined(TANH) || defined(SRELU) || defined(SQRT)
+precision highp float;
+#else  /*LOGISTIC_TANH_SRELU_SQRT*/
+precision mediump float;
+#endif /*LOGISTIC_TANH_SRELU_SQRT*/
+#endif /*DATA_TYPE_FP32*/
+
+#define ABS_OP(a) abs((a))
+#define ADD_OP(a, b) ((a) + (b))
+#define SUB_OP(a, b) ((a) - (b))
+#define MUL_OP(a, b) ((a) * (b))
+#define MLA_OP(a, b, c) ((b) * (c) + (a))
+#define DIV_OP(a, b) ((a) / (b))
+#define EXP_OP(a) exp((a))
+#define LOG_OP(a) log((a))
+#define SQRT_OP(a) sqrt((a))
+#define CONST_ONE (1.f)
+
+// Logistic Activation
+float logistic_op(float x)
+{
+    return DIV_OP(CONST_ONE, ADD_OP(CONST_ONE, EXP_OP(-x)));
+}
+// Hyperbolic Tangent Activation
+float tanh_op(float x)
+{
+    float tmp = float(B_VAL) * x;
+    if(tmp > 10.f)
+    {
+        return MUL_OP(float(A_VAL), 1.f);
+    }
+    else if(tmp < -10.f)
+    {
+        return MUL_OP(float(A_VAL), -1.f);
+    }
+    else
+    {
+        return MUL_OP(float(A_VAL), tanh(tmp + 0.000001f));
+    }
+}
+// RELU Tangent Activation
+float relu_op(float x)
+{
+    return max(0.f, x);
+}
+// Bounded RELU Activation
+float brelu_op(float x)
+{
+    return min(float(A_VAL), max(float(0.0), x));
+}
+// Lower Upper Bounded RELU Activation
+float lu_brelu_op(float x)
+{
+    return min(max(x, float(B_VAL)), float(A_VAL));
+}
+// Leaky RELU Activation
+float lrelu_op(float x)
+{
+    return (x > float(0.0)) ? x : MUL_OP(float(A_VAL), x);
+}
+// Soft RELU Activation
+float srelu_op(float x)
+{
+    return LOG_OP(ADD_OP(CONST_ONE, EXP_OP(x)));
+}
+// Absolute Activation
+float abs_op(float x)
+{
+    return ABS_OP(x);
+}
+// Square Activation
+float square_op(float x)
+{
+    return MUL_OP(x, x);
+}
+// Square-root Activation
+float sqrt_op(float x)
+{
+    return SQRT_OP(x);
+}
+// Linear Activation
+float linear_op(float x)
+{
+    return MLA_OP(float(B_VAL), float(A_VAL), x);
+}
+
+layout(std140) uniform shader_params
+{
+    TENSOR3D_PARAM_DECLARATION(src);
+    TENSOR3D_PARAM_DECLARATION(dst);
+};
+
+#ifdef DATA_TYPE_FP32
+BUFFER_DECLARATION(src, 1, float, readonly);
+BUFFER_DECLARATION(dst, 2, float, writeonly);
+
+/** This performs an activation function floating point inputs.
+ *
+ * @note Activation function should be given as a preprocessor argument using "#define act_name". e.g. "#define TANH"
+ * @note A, B variables required by some activation functions are set using A_VAL= and B_VAL= respectively.
+ *
+ * @param[in]  src_ptr                              Pointer to the source image. Supported data types: F32
+ * @param[in]  src_stride_x                         Stride of the source image in X dimension (in bytes)
+ * @param[in]  src_step_x                           src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                         Stride of the source image in Y dimension (in bytes)
+ * @param[in]  src_step_y                           src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                         Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                           src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes    The offset of the first element in the source image
+ * @param[out] dst_ptr                              Pointer to the destination image. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                         Stride of the destination image in X dimension (in bytes)
+ * @param[in]  dst_step_x                           dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                         ride of the destination image in Y dimension (in bytes)
+ * @param[in]  dst_step_y                           dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                         Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                           dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes    The offset of the first element in the destination image
+ */
+void main(void)
+{
+    Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+    Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+    float data     = src_ptr[src.current_offset];
+    float data_out = 0.f;
+    // Perform activation
+
+#ifdef LOGISTIC
+    data_out = logistic_op(data);
+#elif defined(TANH)     /*LOGISTIC*/
+    data_out = tanh_op(data);
+#elif defined(RELU)     /*RELU*/
+    data_out = relu_op(data);
+#elif defined(BRELU)    /*BRELU*/
+    data_out = brelu_op(data);
+#elif defined(LU_BRELU) /*LU_BRELU*/
+    data_out = lu_brelu_op(data);
+#elif defined(LRELU)    /*LRELU*/
+    data_out = lrelu_op(data);
+#elif defined(SRELU)    /*SRELU*/
+    data_out = srelu_op(data);
+#elif defined(ABS)      /*ABS*/
+    data_out = abs_op(data);
+#elif defined(SQUARE)   /*SQUARE*/
+    data_out = square_op(data);
+#elif defined(SQRT)     /*SQRT*/
+    data_out = sqrt_op(data);
+#elif defined(LINEAR)   /*LINEAR*/
+    data_out = linear_op(data);
+#else                   /*LOGISTIC*/
+#error Activation function not provided
+#endif /*LOGISTIC*/
+
+    dst_ptr[dst.current_offset] = data_out;
+}
+
+#elif defined(DATA_TYPE_FP16)
+BUFFER_DECLARATION(src, 1, uint, readonly);
+BUFFER_DECLARATION(dst, 2, uint, writeonly);
+
+/** This performs an activation function floating point inputs.
+ *
+ * @note Activation function should be given as a preprocessor argument using "#define act_name". e.g. "#define TANH"
+ * @note A, B variables required by some activation functions are set using A_VAL= and B_VAL= respectively.
+ *
+ * @param[in]  src_ptr                              Pointer to the source image. Supported data types: F16
+ * @param[in]  src_stride_x                         Stride of the source image in X dimension (in bytes)
+ * @param[in]  src_step_x                           src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                         Stride of the source image in Y dimension (in bytes)
+ * @param[in]  src_step_y                           src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                         Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                           src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes    The offset of the first element in the source image
+ * @param[out] dst_ptr                              Pointer to the destination image. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                         Stride of the destination image in X dimension (in bytes)
+ * @param[in]  dst_step_x                           dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                         ride of the destination image in Y dimension (in bytes)
+ * @param[in]  dst_step_y                           dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                         Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                           dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes    The offset of the first element in the destination image
+ */
+void main(void)
+{
+    Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT_FP16(src);
+    Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst);
+
+    uint data = src_ptr[src.current_offset >> 2];
+    // Perform activation
+    float a = unpackHalf2x16(data).x;
+    float b = unpackHalf2x16(data).y;
+    vec2  data_out;
+#ifdef LOGISTIC         /*LOGISTIC*/
+    data_out.x = logistic_op(a);
+    data_out.y = logistic_op(b);
+#elif defined(TANH)     /*TANH*/
+    data_out.x = tanh_op(a);
+    data_out.y = tanh_op(b);
+#elif defined(RELU)     /*RELU*/
+    data_out.x = relu_op(a);
+    data_out.y = relu_op(b);
+#elif defined(BRELU)    /*BRELU*/
+    data_out.x = brelu_op(a);
+    data_out.y = brelu_op(b);
+#elif defined(LU_BRELU) /*LU_BRELU*/
+    data_out.x = lu_brelu_op(a);
+    data_out.y = lu_brelu_op(b);
+#elif defined(LRELU)    /*LRELU*/
+    data_out.x = lrelu_op(a);
+    data_out.y = lrelu_op(b);
+#elif defined(SRELU)    /*SRELU*/
+    data_out.x = srelu_op(a);
+    data_out.y = srelu_op(b);
+#elif defined(ABS)      /*ABS*/
+    data_out.x = abs_op(a);
+    data_out.y = abs_op(b);
+#elif defined(SQUARE)   /*SQUARE*/
+    data_out.x = square_op(a);
+    data_out.y = square_op(b);
+#elif defined(SQRT)     /*SQRT*/
+    data_out.x = sqrt_op(a);
+    data_out.y = sqrt_op(b);
+#elif defined(LINEAR)   /*LINEAR*/
+    data_out.x = linear_op(a);
+    data_out.y = linear_op(b);
+#else                   /*LOGISTIC*/
+#error Activation function not provided
+#endif /*LOGISTIC*/
+
+    dst_ptr[dst.current_offset >> 2] = packHalf2x16(data_out);
+}
+#endif /*DATA_TYPE_FP32*/
diff --git a/src/core/GLES_COMPUTE/cs_shaders/batchnormalization_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/batchnormalization_layer.cs
new file mode 100644
index 0000000000..54880926cc
--- /dev/null
+++ b/src/core/GLES_COMPUTE/cs_shaders/batchnormalization_layer.cs
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
+
+#include "helpers.h"
+
+#ifdef DATA_TYPE_FP32
+precision highp float;
+#elif defined(DATA_TYPE_FP16)
+precision mediump float;
+#endif /*DATA_TYPE_FP32*/
+
+#define ADD_OP(a, b) ((a) + (b))
+#define SUB_OP(a, b) ((a) - (b))
+#define MUL_OP(a, b) ((a) * (b))
+#define INVSQRT_OP(a) inversesqrt((a))
+#define SQCVT_SAT(a) (a)
+
+layout(std140) uniform shader_params
+{
+    TENSOR3D_PARAM_DECLARATION(src);
+    TENSOR3D_PARAM_DECLARATION(dst);
+    VECTOR_PARAM_DECLARATION(mean);
+    VECTOR_PARAM_DECLARATION(var);
+    VECTOR_PARAM_DECLARATION(beta);
+    VECTOR_PARAM_DECLARATION(gamma);
+};
+
+#ifdef DATA_TYPE_FP32
+BUFFER_DECLARATION(src, 1, float, readonly);
+BUFFER_DECLARATION(dst, 2, float, writeonly);
+BUFFER_DECLARATION(mean, 3, float, readonly);
+BUFFER_DECLARATION(var, 4, float, readonly);
+BUFFER_DECLARATION(beta, 5, float, readonly);
+BUFFER_DECLARATION(gamma, 6, float, readonly);
+
+/** Apply batch normalization.
+ *
+ * @note Epsilon parameter in the batch normalization equation should be given as a preprocessor argument using "#define EPSILON". e.g. "#define EPSILON 0.1"
+ *
+ * @param[in]  src_ptr                              Pointer to the first source tensor. Supported data types: F32
+ * @param[in]  src_stride_x                         Stride of the first source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                           src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                         Stride of the first source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                           src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                         Stride of the first source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                           src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes    The offset of the first element in the first source tensor
+ * @param[out] dst_ptr                              Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                         Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                           dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                         Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                           dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                         Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                           dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes    The offset of the first element in the destination tensor
+ * @param[in]  mean_ptr                             Pointer to the mean source tensor. Supported data types: same as @p src_ptr
+ * @param[in]  mean_stride_x                        Stride of the mean source tensor in X dimension (in bytes)
+ * @param[in]  mean_step_x                          mean_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  mean_offset_first_element_in_bytes   The offset of the first element in the mean source tensor
+ * @param[in]  var_ptr                              Pointer to the var tensor. Supported data types: same as @p src_ptr
+ * @param[in]  var_stride_x                         Stride of the var tensor in X dimension (in bytes)
+ * @param[in]  var_step_x                           var_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  var_offset_first_element_in_bytes    The offset of the first element in the var source tensor
+ * @param[in]  beta_ptr                             Pointer to the beta source tensor. Supported data types: same as @p src_ptr
+ * @param[in]  beta_stride_x                        Stride of the beta source tensor in X dimension (in bytes)
+ * @param[in]  beta_step_x                          beta_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  beta_offset_first_element_in_bytes   The offset of the first element in the beta source tensor
+ * @param[in]  gamma_ptr                            Pointer to the gamma source tensor. Supported data types: same as @p src_ptr
+ * @param[in]  gamma_stride_x                       Stride of the gamma source tensor in X dimension (in bytes)
+ * @param[in]  gamma_step_x                         gamma_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  gamma_offset_first_element_in_bytes  The offset of the first element in the gamma source tensor
+ */
+void main(void)
+{
+    Tensor3D src   = CONVERT_TO_TENSOR3D_STRUCT(src);
+    Tensor3D dst   = CONVERT_TO_TENSOR3D_STRUCT(dst);
+    Vector   mean  = CONVERT_TO_VECTOR_STRUCT(mean);
+    Vector   var   = CONVERT_TO_VECTOR_STRUCT(var);
+    Vector   beta  = CONVERT_TO_VECTOR_STRUCT(beta);
+    Vector   gamma = CONVERT_TO_VECTOR_STRUCT(gamma);
+
+    float input_value = 0.f;
+    float denominator = 0.f;
+    float numerator   = 0.f;
+    float x_bar       = 0.f;
+    float gamma_param = 0.f;
+    float beta_param  = 0.f;
+
+    uint current_slice = gl_GlobalInvocationID.z;
+
+    input_value = src_ptr[src.current_offset];
+    denominator = var_ptr[var.current_offset + (current_slice * var.stride_x) >> 2];
+    denominator = INVSQRT_OP(ADD_OP(denominator, SQCVT_SAT(float(ESPILON))));
+
+    // Calculate x bar and store results
+    numerator = mean_ptr[mean.current_offset + (current_slice * mean.stride_x) >> 2];
+    numerator = SUB_OP(input_value, numerator);
+    x_bar     = MUL_OP(numerator, denominator);
+
+    gamma_param = gamma_ptr[gamma.current_offset + (current_slice * beta.stride_x) >> 2];
+    beta_param  = beta_ptr[beta.current_offset + (current_slice * beta.stride_x) >> 2];
+
+    dst_ptr[dst.current_offset] = ADD_OP(MUL_OP(gamma_param, x_bar), beta_param);
+}
+
+#elif defined(DATA_TYPE_FP16)
+BUFFER_DECLARATION(src, 1, uint, );
+BUFFER_DECLARATION(dst, 2, uint, writeonly);
+BUFFER_DECLARATION(mean, 3, uint, );
+BUFFER_DECLARATION(var, 4, uint, );
+BUFFER_DECLARATION(beta, 5, uint, );
+BUFFER_DECLARATION(gamma, 6, uint, );
+
+/** Apply batch normalization.
+ *
+ * @note Epsilon parameter in the batch normalization equation should be given as a preprocessor argument using "#define EPSILON". e.g. "#define EPSILON 0.1"
+ *
+ * @param[in]  src_ptr                              Pointer to the first source tensor. Supported data types: F16
+ * @param[in]  src_stride_x                         Stride of the first source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                           src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                         Stride of the first source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                           src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                         Stride of the first source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                           src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes    The offset of the first element in the first source tensor
+ * @param[out] dst_ptr                              Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                         Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                           dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                         Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                           dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                         Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                           dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes    The offset of the first element in the destination tensor
+ * @param[in]  mean_ptr                             Pointer to the mean source tensor. Supported data types: same as @p src_ptr
+ * @param[in]  mean_stride_x                        Stride of the mean source tensor in X dimension (in bytes)
+ * @param[in]  mean_step_x                          mean_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  mean_offset_first_element_in_bytes   The offset of the first element in the mean source tensor
+ * @param[in]  var_ptr                              Pointer to the var tensor. Supported data types: same as @p src_ptr
+ * @param[in]  var_stride_x                         Stride of the var tensor in X dimension (in bytes)
+ * @param[in]  var_step_x                           var_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  var_offset_first_element_in_bytes    The offset of the first element in the var source tensor
+ * @param[in]  beta_ptr                             Pointer to the beta source tensor. Supported data types: same as @p src_ptr
+ * @param[in]  beta_stride_x                        Stride of the beta source tensor in X dimension (in bytes)
+ * @param[in]  beta_step_x                          beta_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  beta_offset_first_element_in_bytes   The offset of the first element in the beta source tensor
+ * @param[in]  gamma_ptr                            Pointer to the gamma source tensor. Supported data types: same as @p src_ptr
+ * @param[in]  gamma_stride_x                       Stride of the gamma source tensor in X dimension (in bytes)
+ * @param[in]  gamma_step_x                         gamma_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  gamma_offset_first_element_in_bytes  The offset of the first element in the gamma source tensor
+ */
+void main(void)
+{
+    Tensor3D src   = CONVERT_TO_TENSOR3D_STRUCT_FP16(src);
+    Tensor3D dst   = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst);
+    Vector   mean  = CONVERT_TO_VECTOR_STRUCT_FP16(mean);
+    Vector   var   = CONVERT_TO_VECTOR_STRUCT_FP16(var);
+    Vector   beta  = CONVERT_TO_VECTOR_STRUCT_FP16(beta);
+    Vector   gamma = CONVERT_TO_VECTOR_STRUCT_FP16(gamma);
+
+    vec2  input_value;
+    float denominator;
+    float numerator;
+    vec2  x_bar;
+    float gamma_param;
+    float beta_param;
+
+    uint current_slice = gl_GlobalInvocationID.z;
+    if((current_slice % uint(2)) == uint(0))
+    {
+        input_value = unpackHalf2x16(src_ptr[src.current_offset >> 2]);
+        denominator = unpackHalf2x16(var_ptr[(var.current_offset + current_slice * var.stride_x) >> 2]).x;
+        denominator = INVSQRT_OP(ADD_OP(denominator, SQCVT_SAT(float(ESPILON))));
+
+        //Calculate x bar and store results
+        numerator = unpackHalf2x16(mean_ptr[(mean.current_offset + current_slice * mean.stride_x) >> 2]).x;
+        x_bar     = MUL_OP(SUB_OP(input_value, numerator), denominator);
+
+        gamma_param = unpackHalf2x16(gamma_ptr[(gamma.current_offset + current_slice * beta.stride_x) >> 2]).x;
+        beta_param  = unpackHalf2x16(beta_ptr[(beta.current_offset + current_slice * beta.stride_x) >> 2]).x;
+
+        dst_ptr[dst.current_offset >> 2] = packHalf2x16(ADD_OP(MUL_OP(gamma_param, x_bar), beta_param));
+    }
+    else
+    {
+        input_value = unpackHalf2x16(src_ptr[src.current_offset >> 2]);
+        denominator = unpackHalf2x16(var_ptr[(var.current_offset + current_slice * var.stride_x) >> 2]).y;
+        denominator = INVSQRT_OP(ADD_OP(denominator, SQCVT_SAT(float(ESPILON))));
+
+        //Calculate x bar and store results
+        numerator = unpackHalf2x16(mean_ptr[(mean.current_offset + current_slice * mean.stride_x) >> 2]).y;
+        x_bar     = MUL_OP(SUB_OP(input_value, numerator), denominator);
+
+        gamma_param = unpackHalf2x16(gamma_ptr[(gamma.current_offset + current_slice * beta.stride_x) >> 2]).y;
+        beta_param  = unpackHalf2x16(beta_ptr[(beta.current_offset + current_slice * beta.stride_x) >> 2]).y;
+
+        dst_ptr[dst.current_offset >> 2] = packHalf2x16(ADD_OP(MUL_OP(gamma_param, x_bar), beta_param));
+    }
+}
+#endif /*DATA_TYPE_FP32*/
diff --git a/src/core/GLES_COMPUTE/cs_shaders/concatenate.cs b/src/core/GLES_COMPUTE/cs_shaders/concatenate.cs
new file mode 100644
index 0000000000..65000f2de2
--- /dev/null
+++ b/src/core/GLES_COMPUTE/cs_shaders/concatenate.cs
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
+#include "helpers.h"
+
+#ifdef DATA_TYPE_FP32
+precision highp float;
+
+layout(std140) uniform shader_params
+{
+    TENSOR3D_PARAM_DECLARATION(src);
+    TENSOR3D_PARAM_DECLARATION(dst);
+};
+
+BUFFER_DECLARATION(src, 1, float, readonly);
+BUFFER_DECLARATION(dst, 2, float, writeonly);
+
+/** This kernel concatenates the input tensor into the output tensor along the third dimension
+ *
+ * @param[in]  src_ptr                           Pointer to the source tensor. Supported data types: F32
+ * @param[in]  src_stride_x                      Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source tensor
+ * @param[out] dst_ptr                           Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ */
+void main(void)
+{
+    Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+    Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+    dst_ptr[dst.current_offset + uint(OFFSETS_Z >> 2)] = src_ptr[tensor3D_offset(src, -OFFSETS_X, -OFFSETS_Y, 0)];
+}
+
+#elif defined(DATA_TYPE_FP16)
+precision mediump float;
+
+layout(std140) uniform shader_params
+{
+    TENSOR3D_PARAM_DECLARATION(src);
+    TENSOR3D_PARAM_DECLARATION(dst);
+};
+
+BUFFER_DECLARATION(src, 1, uvec2, readonly);
+BUFFER_DECLARATION(dst, 2, uvec2, writeonly);
+
+/** This kernel concatenates the input tensor into the output tensor along the third dimension
+ *
+ * @param[in]  src_ptr                           Pointer to the source tensor. Supported data types: F16
+ * @param[in]  src_stride_x                      Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source tensor
+ * @param[out] dst_ptr                           Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ */
+void main(void)
+{
+    Tensor3D src = GC_CONVERT_TO_TENSOR3D_STRUCT(src);
+    Tensor3D dst = GC_CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+    uvec2 packed_s;
+    GC_LOAD1_3D_OFFSET(packed_s, src, -OFFSETS_X, -OFFSETS_Y, 0);
+    dst_ptr[(dst.current_offset + uint(OFFSETS_Z)) >> 3] = packed_s;
+}
+#endif /*DATA_TYPE_FP32*/
\ No newline at end of file
diff --git a/src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs
new file mode 100644
index 0000000000..1a0c9f1d30
--- /dev/null
+++ b/src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
+#include "helpers.h"
+
+#ifdef DATA_TYPE_FP16
+BUFFER_DECLARATION(src, 1, uint, readonly);
+BUFFER_DECLARATION(dst, 2, uint, restrict);
+#else  // DATA_TYPE_FP16
+BUFFER_DECLARATION(src, 1, float, readonly);
+BUFFER_DECLARATION(dst, 2, float, restrict);
+#endif // DATA_TYPE_FP16
+
+layout(std140) uniform shader_params
+{
+#ifdef IM2COL_GENERIC
+    TENSOR3D_PARAM_DECLARATION(src);
+    IMAGE_PARAM_DECLARATION(dst);
+    uint filter_depth;
+    uint src_stride_w;
+    uint dst_stride_w;
+#endif // IM2COL_GENERIC
+
+#ifdef IM2COL_REDUCED
+    TENSOR3D_PARAM_DECLARATION(src);
+    VECTOR_PARAM_DECLARATION(dst);
+    uint width;
+    uint height;
+#endif // IM2COL_REDUCED
+
+#ifdef COL2IM
+    IMAGE_PARAM_DECLARATION(src);
+    TENSOR3D_PARAM_DECLARATION(dst);
+    uint width;
+#endif // COL2IM
+};
+
+#ifdef DATA_TYPE_FP16
+
+precision mediump float;
+
+#ifdef IM2COL_REDUCED
+/** This kernel reshapes the tensor's low three dimensions to single row for GEMM operation
+ *
+ * @note The data type must be passed at compile time using "#define DATA_TYPE_FP16"
+ * @note In case biases will be added in late stage, "#define HAS_BIAS" has to be passed to append the final matrix with 1 in each row.
+ *
+ * @param[in]  src_ptr                           Pointer to the source tensor. Supported data types: F16
+ * @param[in]  src_stride_x                      Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source tensor
+ * @param[out] dst_ptr                           Pointer to the destination tensor. Same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ * @param[in]  width                             The width of the input tensor
+ * @param[in]  height                            The height of the input tensor
+ */
+void main(void)
+{
+    uvec3    pos            = uvec3(gl_GlobalInvocationID.xyz);
+    uvec3    size           = uvec3(gl_WorkGroupSize.xyz);
+    Tensor3D src            = CONVERT_TO_TENSOR3D_STRUCT_FP16(src);
+    Tensor3D src_nostep     = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP_FP16(src);
+    Vector   dst            = CONVERT_TO_VECTOR_STRUCT_NO_STEP_FP16(dst);
+    uint     image_size     = width * height;
+    uint     element_count  = src_step_x / src_stride_x;
+    uint     tmp_out_offset = dst.current_offset + ((pos.x * element_count + pos.y * width + pos.z * image_size) * dst.stride_x);
+    uint     width_fp16     = ((width + uint(1)) >> uint(1));
+    uint     tmp;
+
+    // odd width
+    if(width % uint(2) != uint(0))
+    {
+        // even row
+        if((pos.y + pos.z * height) % uint(2) == uint(0))
+        {
+            LOAD1(tmp, src, src.current_offset >> uint(2));
+            STORE1(dst, tmp_out_offset >> uint(2), tmp);
+        }
+        else
+        {
+            // special op
+            uint tmpleft  = uint(0);
+            uint tmpright = uint(0);
+            LOAD1(tmpright, src, src.current_offset >> uint(2)); // right half
+            if(pos.x == uint(0))
+            {
+                LOAD1(tmpleft, src, tensor3D_offset_fp16(src_nostep, int(width), int(pos.y) - 1, int(pos.z)) >> uint(2)); // left half
+                tmpright = (tmpleft & uint(0xffff)) + (tmpright << uint(16));
+            }
+            else
+            {
+                LOAD1(tmpleft, src, tensor3D_offset_fp16(src_nostep, (int(pos.x) - 1) * int(element_count), int(pos.y), int(pos.z)) >> uint(2)); // left half
+                tmpright = ((tmpleft >> uint(16)) + (tmpright << uint(16)));
+            }
+            STORE1(dst, tmp_out_offset >> uint(2), tmpright);
+        }
+    }
+    else
+    {
+        LOAD1(tmp, src, src.current_offset >> uint(2));
+        STORE1(dst, tmp_out_offset >> uint(2), tmp);
+    }
+
+#ifdef HAS_BIAS
+    // If it is the last thread in the 3 dimensional workgroup
+    if(pos.x == (size.x - 1) && pos.y == (size.y - 1) && pos.z == (size.z - 1))
+    {
+        tmp_out_offset += dst.stride_x;
+
+        // FIXME: need odd/even detection for tmp_out_offset?
+        mediump vec2 bias_vec = vec2(1.0f, 1.0f);
+        uint         bias_u   = packHalf2x16(bias_vec);
+        STORE1(dst, tmp_out_offset >> uint(2), bias_u);
+    }
+#endif // HAS_BIAS
+}
+#endif // IM2COL_REDUCED
+
+#elif defined(DATA_TYPE_FP32)
+
+#ifdef IM2COL_GENERIC
+/** This kernel performs a reshaping of the input tensor to a tensor used to perform convolution using GEMM.
+ *
+ * @note The data type must be passed at compile time using "#define DATA_TYPE_FP32"
+ * @note In case biases will be added to the convolution "#define HAS_BIAS" has to be passed to append the final matrix with 1 in each row.
+ *
+ * @param[in]  src_ptr                           Pointer to the source tensor. Supported data types: F32
+ * @param[in]  src_stride_x                      Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source tensor
+ * @param[out] dst_ptr                           Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ * @param[in]  filter_depth                      The depth of the used filter
+ * @param[in]  src_stride_w                      Stride of the source tensor in W dimension (in bytes).
+ * @param[in]  dst_stride_w                      Stride of the destination tensor in W dimension (in bytes).
+ */
+void main(void)
+{
+    uint xc    = gl_GlobalInvocationID.x;                // x coordinate in the convolved tensor
+    uint yc    = gl_GlobalInvocationID.y;                // y coordinate in the convolved tensor
+    uint ch    = gl_GlobalInvocationID.z % filter_depth; // input feature map
+    uint batch = gl_GlobalInvocationID.z / filter_depth; // the batch
+
+    // Calculate input indeces
+    uint xi           = xc * uint(STRIDE_X) - uint(PAD_X);
+    uint yi           = yc * uint(STRIDE_Y) - uint(PAD_Y);
+    uint input_offset = (src_offset_first_element_in_bytes + (ch * src_stride_z) + (batch * src_stride_w)) >> uint(2);
+
+    // Calculate output indeces
+    uint xo            = ch * uint(KERNEL_WIDTH) * uint(KERNEL_HEIGHT);
+    uint yo            = xc + yc * uint(CONVOLVED_WIDTH); // Index of the convolution
+    uint output_offset = (dst_offset_first_element_in_bytes + (yo * dst_stride_y) + (batch * dst_stride_w) + xo) >> uint(2);
+
+    // Linearize convolution elements
+    for(uint y = yi, y_e = yi + uint(KERNEL_HEIGHT); y < y_e; ++y)
+    {
+        for(uint x = xi, x_e = xi + uint(KERNEL_WIDTH); x < x_e; ++x)
+        {
+#if PAD_X == 0 && PAD_Y == 0
+            output_offset = input_offset + ((x * src_stride_x + y * src_stride_y) >> uint(2));
+            STORE4(dst, output_offset, LOAD4(src, input_offset));
+#else  // PAD_X == 0 && PAD_Y == 0
+            if(x < 0 || x >= SRC_WIDTH || y < 0 || y >= SRC_HEIGHT)
+            {
+                STORE4(dst, output_offset, 0.0f);
+            }
+            else
+            {
+                output_offset = input_offset + (x * src_stride_x + y * src_stride_y) >> uint(2));
+                STORE4(dst, output_offset, LOAD4(src, input_offset));
+            }
+#endif // PAD_X == 0 && PAD_Y == 0
+        }
+    }
+
+#ifdef HAS_BIAS
+    if(ch == (uint(KERNEL_DEPTH) - 1))
+    {
+        STORE4(dst, output_offset, 1.0f);
+    }
+#endif // HAS_BIAS
+}
+#endif // IM2COL_GENERIC
+
+#ifdef IM2COL_REDUCED
+/** This kernel reshapes the tensor's low three dimensions to single row for GEMM operation
+ *
+ * @note The data type must be passed at compile time using "#define DATA_TYPE_FP32"
+ * @note In case biases will be added in late stage, "#define HAS_BIAS" has to be passed to append the final matrix with 1 in each row.
+ *
+ * @param[in]  src_ptr                           Pointer to the source tensor. Supported data types: F32
+ * @param[in]  src_stride_x                      Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source tensor
+ * @param[out] dst_ptr                           Pointer to the destination tensor. Same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ * @param[in]  width                             The width of the input tensor
+ * @param[in]  height                            The height of the input tensor
+ */
+void main(void)
+{
+    uvec3    pos            = uvec3(gl_GlobalInvocationID.xyz);
+    uvec3    size           = uvec3(gl_WorkGroupSize.xyz);
+    Tensor3D src            = CONVERT_TO_TENSOR3D_STRUCT(src);
+    Vector   dst            = CONVERT_TO_VECTOR_STRUCT_NO_STEP(dst);
+    uint     image_size     = width * height;
+    uint     tmp_out_offset = dst.current_offset + (((pos.x + pos.y * width + pos.z * image_size) * dst.stride_x) >> 2);
+
+    STORE4(dst, tmp_out_offset, LOAD4(src, src.current_offset));
+
+#ifdef HAS_BIAS
+    // If it is the last thread in the 3 dimensional workgroup
+    if(pos.x == (size.x - 1) && pos.y == (size.y - 1) && pos.z == (size.z - 1))
+    {
+        tmp_out_offset += (dst.stride_x >> uint(2));
+        STORE4(dst, tmp_out_offset, 1.f);
+    }
+#endif // HAS_BIAS
+}
+#endif // IM2COL_REDUCED
+
+#ifdef COL2IM
+/** This kernel performs a reshaping of the output of the convolution layer.
+ *
+ * @note The data type must be passed at compile time using "#define DATA_TYPE_FP32"
+ *
+ * @param[in]  src_ptr                           Pointer to the source tensor. Supported data types: F32
+ * @param[in]  src_stride_x                      Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source tensor
+ * @param[out] dst_ptr                           Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ * @param[in]  dst_stride_w                      Stride of the destination tensor in W dimension (in bytes)
+ */
+void main(void)
+{
+    uvec2    pos = uvec2(gl_GlobalInvocationID.xy);
+    Image    src = CONVERT_TO_IMAGE_STRUCT(src);
+    Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+    uint idx            = pos.x * dst.stride_z + (pos.y / width) * dst.stride_y + (pos.y % width) * dst.stride_x;
+    uint tmp_out_offset = dst.current_offset + (idx >> 2);
+
+    STORE4(dst, tmp_out_offset, LOAD4(src, src.current_offset));
+}
+#endif // COL2IM
+
+#else // DATA_TYPE_FP16
+#error Data type not supported
+#endif // DATA_TYPE_FP16
diff --git a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs
new file mode 100644
index 0000000000..3a31cb80a7
--- /dev/null
+++ b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs
@@ -0,0 +1,275 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
+
+#include "helpers.h"
+
+layout(std140) uniform shader_params
+{
+    TENSOR3D_PARAM_DECLARATION(src);
+    TENSOR3D_PARAM_DECLARATION(dst);
+    TENSOR3D_PARAM_DECLARATION(weights);
+#ifdef BIAS
+    VECTOR_PARAM_DECLARATION(biases);
+#endif /* BIAS */
+    uint weights_stride_w;
+    uint weights_depth;
+};
+
+#if defined(DATA_TYPE_FP32)
+precision highp float;
+
+BUFFER_DECLARATION(src, 1, float, readonly);
+BUFFER_DECLARATION(dst, 2, float, writeonly);
+BUFFER_DECLARATION(weights, 3, float, readonly);
+#ifdef BIAS
+BUFFER_DECLARATION(biases, 4, float, readonly);
+#endif /* BIAS */
+
+/** This kernel performs a direct convolution to convolve the low three dimensions.
+ *
+ * @note The data type must be passed at compile time using "#define DATA_TYPE_FP32"
+ * @note The convolution stride x must be passed at compile time using "#define STRIDE_X" e.g. "#define STRIDE_X 1"
+ * @note In case biases will be added to the convolution "#define HAS_BIAS" has to be passed to append the final matrix with 1 in each row.
+ *
+ * @param[in]  src_ptr                               Pointer to the source tensor. Supported data types: F32
+ * @param[in]  src_stride_x                          Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                            src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                          Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                            src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                          Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                            src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes     The offset of the first element in the source tensor
+ * @param[out] dst_ptr                               Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                          Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                            dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                          Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                            dst_stride_y * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                          Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                            dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes     The offset of the first element in the destination tensor
+ * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p src_ptr
+ * @param[in]  weights_stride_x                      Stride of the weights tensor in X dimension (in bytes)
+ * @param[in]  weights_step_x                        weights_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  weights_stride_y                      Stride of the weights tensor in Y dimension (in bytes)
+ * @param[in]  weights_step_y                        weights_stride_y * number of elements along y processed per workitem(in bytes)
+ * @param[in]  weights_stride_z                      Stride of the weights tensor in Z dimension (in bytes)
+ * @param[in]  weights_step_z                        weights_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  weights_offset_first_element_in_bytes The offset of the first element in the weights tensor
+ * @param[in]  biases_ptr                            Pointer to the biases tensor. Same as @p src_ptr
+ * @param[in]  biases_stride_x                       Stride of the biases tensor in X dimension (in bytes)
+ * @param[in]  biases_step_x                         biases_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes  The offset of the first element in the biases tensor
+ * @param[in]  weights_stride_w                      Stride of the weights tensor in the 4th dimension
+ * @param[in]  weights_depth                         The third dimensions of the weights tensors
+ */
+void main()
+{
+    Image    src     = CONVERT_TO_IMAGE_STRUCT(src);
+    Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(weights);
+    Tensor3D dst     = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+#ifdef BIAS
+    Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases);
+#endif /* BIAS */
+
+    float pixels  = CONVERT(0, float);
+    uint  z_index = gl_GlobalInvocationID.z;
+    weights.current_offset += z_index * weights_stride_w >> 2;
+    float temp;
+    float temp_weight;
+
+    for(int d = 0; d < int(weights_depth); ++d)
+    {
+        temp        = LOAD4(src, CURRENT_OFFSET(src));
+        temp_weight = LOAD4(weights, CURRENT_OFFSET(weights));
+        pixels += temp * temp_weight;
+
+        src.current_offset += (src_stride_z >> 2);
+        weights.current_offset += (weights_stride_z >> 2);
+    }
+
+#ifdef BIAS
+    pixels += LOAD4(biases, vector_offset(biases, int(z_index)));
+#endif /* BIAS */
+
+    STORE4(dst, CURRENT_OFFSET(dst), pixels);
+}
+#elif defined(DATA_TYPE_FP16)
+precision mediump float;
+
+BUFFER_DECLARATION(src, 1, uvec4, readonly);
+BUFFER_DECLARATION(dst, 2, uvec4, writeonly);
+BUFFER_DECLARATION(weights, 3, uint, readonly);
+#ifdef BIAS
+BUFFER_DECLARATION(biases, 4, uint, readonly);
+#endif /* BIAS */
+
+#if STRIDE_X == 2
+#define CONVOLVE(s, w) convolve_stride2(s, w)
+#elif STRIDE_X == 1 /* STRIDE_X == 1 */
+#define CONVOLVE(s, w) convolve_stride1(s, w)
+#else /* STRIDE_X not equals 1 or 2 */
+#error STRIDE_X larger than 2 is not supported
+#endif /* STRIDE_X == 2 */
+
+vec4[2] convolve_stride1(Image src, float w)
+{
+    uvec4 packed_s;
+    vec4  s[2];
+
+    GC_LOAD1_2D_OFFSET(packed_s, src, 0, 0);
+
+    s[0] = vec4(unpackHalf2x16(packed_s.x), unpackHalf2x16(packed_s.y));
+    s[1] = vec4(unpackHalf2x16(packed_s.z), unpackHalf2x16(packed_s.w));
+
+    s[0] *= w;
+    s[1] *= w;
+
+    return s;
+}
+
+vec4[2] convolve_stride2(Image src, float w)
+{
+    uvec4 packed_s;
+    vec4  s[2];
+    vec4  r[2];
+
+    GC_LOAD1_2D_OFFSET(packed_s, src, 0, 0);
+    s[0] = vec4(unpackHalf2x16(packed_s.x), unpackHalf2x16(packed_s.y));
+    s[1] = vec4(unpackHalf2x16(packed_s.z), unpackHalf2x16(packed_s.w));
+
+    r[0] = vec4(s[0].xz, s[1].xz);
+
+    GC_LOAD1_2D_OFFSET(packed_s, src, 8, 0);
+    s[0] = vec4(unpackHalf2x16(packed_s.x), unpackHalf2x16(packed_s.y));
+    s[1] = vec4(unpackHalf2x16(packed_s.z), unpackHalf2x16(packed_s.w));
+
+    r[1] = vec4(s[0].xz, s[1].xz);
+
+    r[0] *= w;
+    r[1] *= w;
+
+    return r;
+}
+
+/** This kernel performs a direct convolution to convolve the low three dimensions.
+ *
+ * @note The data type must be passed at compile time using "#define DATA_TYPE_FP16"
+ * @note The convolution stride x must be passed at compile time using "#define STRIDE_X" e.g. "#define STRIDE_X 1"
+ * @note In case biases will be added to the convolution "#define HAS_BIAS" has to be passed to append the final matrix with 1 in each row.
+ *
+ * @param[in]  src_ptr                               Pointer to the source tensor. Supported data types: F16
+ * @param[in]  src_stride_x                          Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                            src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                          Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                            src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                          Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                            src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes     The offset of the first element in the source tensor
+ * @param[out] dst_ptr                               Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                          Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                            dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                          Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                            dst_stride_y * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                          Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                            dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes     The offset of the first element in the destination tensor
+ * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p src_ptr
+ * @param[in]  weights_stride_x                      Stride of the weights tensor in X dimension (in bytes)
+ * @param[in]  weights_step_x                        weights_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  weights_stride_y                      Stride of the weights tensor in Y dimension (in bytes)
+ * @param[in]  weights_step_y                        weights_stride_y * number of elements along y processed per workitem(in bytes)
+ * @param[in]  weights_stride_z                      Stride of the weights tensor in Z dimension (in bytes)
+ * @param[in]  weights_step_z                        weights_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  weights_offset_first_element_in_bytes The offset of the first element in the weights tensor
+ * @param[in]  biases_ptr                            Pointer to the biases tensor. Same as @p src_ptr
+ * @param[in]  biases_stride_x                       Stride of the biases tensor in X dimension (in bytes)
+ * @param[in]  biases_step_x                         biases_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes  The offset of the first element in the biases tensor
+ * @param[in]  weights_stride_w                      Stride of the weights tensor in the 4th dimension
+ * @param[in]  weights_depth                         The third dimensions of the weights tensors
+ */
+void main()
+{
+    Image    src     = GC_CONVERT_TO_IMAGE_STRUCT(src);
+    Tensor3D weights = GC_CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(weights);
+    Tensor3D dst     = GC_CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+#ifdef BIAS
+    Vector   biases  = GC_CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases);
+#endif /* BIAS */
+
+    vec4 pixels[2];
+    pixels[0] = vec4(0.f);
+    pixels[1] = vec4(0.f);
+
+    uint z_index = gl_GlobalInvocationID.z;
+
+    weights.current_offset += z_index * weights_stride_w;
+
+    uint  packed_w;
+    float w;
+
+    for(int d = 0; d < int(weights_depth); ++d)
+    {
+        GC_LOAD1_3D_OFFSET(packed_w, weights, 0, 0, 0);
+        w = unpackHalf2x16(packed_w).x;
+
+        vec4 r[2] = CONVOLVE(src, w);
+        pixels[0] += r[0];
+        pixels[1] += r[1];
+
+        src.current_offset += src_stride_z;
+        weights.current_offset += weights_stride_z;
+    }
+
+#ifdef BIAS
+    uint  packed_b;
+    float b;
+
+    GC_LOAD1_1D_OFFSET(packed_b, biases, z_index);
+
+    if(z_index % uint(2) == uint(0))
+    {
+        b = unpackHalf2x16(packed_b).x;
+    }
+    else
+    {
+        b = unpackHalf2x16(packed_b).y;
+    }
+
+    pixels[0] += vec4(b);
+    pixels[1] += vec4(b);
+#endif /* BIAS */
+
+    uvec4 packed_d;
+    packed_d = uvec4(packHalf2x16(pixels[0].xy), packHalf2x16(pixels[0].zw),
+                     packHalf2x16(pixels[1].xy), packHalf2x16(pixels[1].zw));
+    GC_STORE1_3D_OFFSET(packed_d, dst, 0, 0, 0);
+}
+#else  /* DATA_TYPE_FP32 */
+#error Data type not supported
+#endif /* DATA_TYPE_FP32 */
diff --git a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs
new file mode 100644
index 0000000000..67b92cb8cf
--- /dev/null
+++ b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs
@@ -0,0 +1,1583 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
+
+#include "helpers.h"
+
+layout(std140) uniform shader_params
+{
+    TENSOR3D_PARAM_DECLARATION(src);
+    TENSOR3D_PARAM_DECLARATION(dst);
+    TENSOR3D_PARAM_DECLARATION(weights);
+#ifdef BIAS
+    VECTOR_PARAM_DECLARATION(biases);
+#endif /* BIAS */
+    uint weights_stride_w;
+    uint weights_depth;
+};
+
+#define LOAD12(r, name, offset)          \
+    r.x = LOAD4(name, offset);           \
+    r.y = LOAD4(name, offset + uint(1)); \
+    r.z = LOAD4(name, offset + uint(2))
+
+#define LOAD3X3(r, name)                                \
+    r[0] = LOAD4(name, tensor3D_offset(name, 0, 0, 0)); \
+    r[1] = LOAD4(name, tensor3D_offset(name, 1, 0, 0)); \
+    r[2] = LOAD4(name, tensor3D_offset(name, 2, 0, 0)); \
+    r[3] = LOAD4(name, tensor3D_offset(name, 0, 1, 0)); \
+    r[4] = LOAD4(name, tensor3D_offset(name, 1, 1, 0)); \
+    r[5] = LOAD4(name, tensor3D_offset(name, 2, 1, 0)); \
+    r[6] = LOAD4(name, tensor3D_offset(name, 0, 2, 0)); \
+    r[7] = LOAD4(name, tensor3D_offset(name, 1, 2, 0)); \
+    r[8] = LOAD4(name, tensor3D_offset(name, 2, 2, 0))
+
+#if defined(PROCESS_1_ELEMENT)
+BUFFER_DECLARATION(src, 1, float, readonly);
+BUFFER_DECLARATION(dst, 2, float, writeonly);
+BUFFER_DECLARATION(weights, 3, float, readonly);
+#ifdef BIAS
+BUFFER_DECLARATION(biases, 4, float, readonly);
+#endif /* BIAS */
+
+/** This kernel performs a direct convolution to convolve the low three dimensions.
+ *
+ * @note The data type must be passed at compile time using "#define DATA_TYPE_FP32"
+ * @note If biases are used then "define HAS_BIAS" has to be passed at compile time
+ *
+ * @param[in]  src_ptr                               Pointer to the source tensor. Supported data types: F32
+ * @param[in]  src_stride_x                          Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                            src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                          Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                            src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                          Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                            src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes     The offset of the first element in the source tensor
+ * @param[out] dst_ptr                               Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                          Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                            dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                          Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                            dst_stride_y * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                          Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                            dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes     The offset of the first element in the destination tensor
+ * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p src_ptr
+ * @param[in]  weights_stride_x                      Stride of the weights tensor in X dimension (in bytes)
+ * @param[in]  weights_step_x                        weights_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  weights_stride_y                      Stride of the weights tensor in Y dimension (in bytes)
+ * @param[in]  weights_step_y                        weights_stride_y * number of elements along y processed per workitem(in bytes)
+ * @param[in]  weights_stride_z                      Stride of the weights tensor in Z dimension (in bytes)
+ * @param[in]  weights_step_z                        weights_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  weights_offset_first_element_in_bytes The offset of the first element in the weights tensor
+ * @param[in]  biases_ptr                            Pointer to the biases tensor. Same as @p src_ptr
+ * @param[in]  biases_stride_x                       Stride of the biases tensor in X dimension (in bytes)
+ * @param[in]  biases_step_x                         biases_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes  The offset of the first element in the biases tensor
+ * @param[in]  weights_stride_w                      Stride of the weights tensor in the 4th dimension
+ * @param[in]  weights_depth                         The third dimensions of the weights tensors
+ */
+void main()
+{
+    Image    src     = CONVERT_TO_IMAGE_STRUCT(src);
+    Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(weights);
+    Tensor3D dst     = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+#ifdef BIAS
+    Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases);
+#endif /* BIAS */
+
+    float pixels = CONVERT(0, float);
+
+    uint z_index = gl_GlobalInvocationID.z;
+
+    weights.current_offset += z_index * weights_stride_w >> 2;
+
+    for(int d = 0; d < int(weights_depth); ++d)
+    {
+        vec3 temp;
+        vec3 w;
+
+        LOAD12(temp, src, offset(src, 0, 0));
+        LOAD12(w, weights, tensor3D_offset(weights, 0, 0, 0));
+
+        pixels += temp.x * w[0] + temp.y * w[1] + temp.z * w[2];
+
+        LOAD12(temp, src, offset(src, 0, 1));
+        LOAD12(w, weights, tensor3D_offset(weights, 0, 1, 0));
+
+        pixels += temp.x * w[0] + temp.y * w[1] + temp.z * w[2];
+
+        LOAD12(temp, src, offset(src, 0, 2));
+        LOAD12(w, weights, tensor3D_offset(weights, 0, 2, 0));
+
+        pixels += temp.x * w[0] + temp.y * w[1] + temp.z * w[2];
+
+        src.current_offset += src_stride_z >> 2;
+        weights.current_offset += weights_stride_z >> 2;
+    }
+
+#ifdef BIAS
+    pixels += LOAD4(biases, vector_offset(biases, int(z_index)));
+#endif /* BIAS */
+
+    STORE4(dst, CURRENT_OFFSET(dst), pixels);
+}
+#elif defined(PROCESS_8_ELEMENT)
+BUFFER_DECLARATION(src, 1, vec4, readonly);
+BUFFER_DECLARATION(dst, 2, vec4, writeonly);
+BUFFER_DECLARATION(weights, 3, float, readonly);
+#ifdef BIAS
+BUFFER_DECLARATION(biases, 4, float, readonly);
+#endif /* BIAS */
+
+#if STRIDE_X == 2
+#define CONVOLVE1x3(offset, w) convolve1x3_stride2(offset, w)
+#elif STRIDE_X == 1 /* STRIDE_X == 1 */
+#define CONVOLVE1x3(offset, w) convolve1x3_stride1(offset, w)
+#else /* STRIDE_X not equals 1 or 2 */
+#error STRIDE_X larger than 2 is not supported
+#endif /* STRIDE_X == 2 */
+
+vec4[2] convolve1x3_stride1(uint offset, vec3 w)
+{
+    vec4 middle;
+    vec4 right;
+    vec4 tmp[3];
+    vec4 r[2];
+
+    LOAD3(tmp, src, offset);
+
+    middle = vec4(tmp[0].yzw, tmp[1].x);
+    right  = vec4(tmp[0].zw, tmp[1].xy);
+
+    r[0] = tmp[0] * w[0] + middle * w[1] + right * w[2];
+
+    middle = vec4(tmp[1].yzw, tmp[2].x);
+    right  = vec4(tmp[1].zw, tmp[2].xy);
+
+    r[1] = tmp[1] * w[0] + middle * w[1] + right * w[2];
+
+    return r;
+}
+
+vec4[2] convolve1x3_stride2(uint offset, vec3 w)
+{
+    vec4 left;
+    vec4 middle;
+    vec4 right;
+    vec4 tmp[3];
+    vec4 r[2];
+
+    LOAD3(tmp, src, offset);
+
+    left   = vec4(tmp[0].xz, tmp[1].xz);
+    middle = vec4(tmp[0].yw, tmp[1].yw);
+    right  = vec4(tmp[0].z, tmp[1].xz, tmp[2].x);
+
+    r[0] = left * w[0] + middle * w[1] + right * w[2];
+
+    LOAD2(tmp, src, offset + ((uint(3) * src_stride_x) >> 2));
+
+    left   = vec4(tmp[2].xz, tmp[0].xz);
+    middle = vec4(tmp[2].yw, tmp[0].yw);
+    right  = vec4(tmp[2].z, tmp[0].xz, tmp[1].x);
+
+    r[1] = left * w[0] + middle * w[1] + right * w[2];
+
+    return r;
+}
+
+/** An optimized direct convolution 3x3 OpenGL ES compute shader for process 8 elements at once
+ *
+ * @note This OpenGL ES shader works with stride_x = 1 and 2
+ * @note The data type must be passed at compile time using "#define DATA_TYPE_FP32"
+ * @note If biases are used then "define HAS_BIAS" has to be passed at compile time
+ *
+ * @param[in]  src_ptr                               Pointer to the source tensor. Supported data types: F32
+ * @param[in]  src_stride_x                          Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                            src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                          Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                            src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                          Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                            src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes     The offset of the first element in the source tensor
+ * @param[out] dst_ptr                               Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                          Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                            dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                          Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                            dst_stride_y * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                          Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                            dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes     The offset of the first element in the destination tensor
+ * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p src_ptr
+ * @param[in]  weights_stride_x                      Stride of the weights tensor in X dimension (in bytes)
+ * @param[in]  weights_step_x                        weights_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  weights_stride_y                      Stride of the weights tensor in Y dimension (in bytes)
+ * @param[in]  weights_step_y                        weights_stride_y * number of elements along y processed per workitem(in bytes)
+ * @param[in]  weights_stride_z                      Stride of the weights tensor in Z dimension (in bytes)
+ * @param[in]  weights_step_z                        weights_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  weights_offset_first_element_in_bytes The offset of the first element in the weights tensor
+ * @param[in]  biases_ptr                            Pointer to the biases tensor. Same as @p src_ptr
+ * @param[in]  biases_stride_x                       Stride of the biases tensor in X dimension (in bytes)
+ * @param[in]  biases_step_x                         biases_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes  The offset of the first element in the biases tensor
+ * @param[in]  weights_stride_w                      Stride of the weights tensor in the 4th dimension
+ * @param[in]  weights_depth                         The third dimensions of the weights tensors
+ */
+void main()
+{
+    Image    src     = CONVERT_TO_IMAGE_STRUCT(src);
+    Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(weights);
+    Tensor3D dst     = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+#ifdef BIAS
+    Vector   biases  = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases);
+#endif /* BIAS */
+
+    vec4 pixels[2];
+    pixels[0] = vec4(0);
+    pixels[1] = vec4(0);
+
+    uint z_index = gl_GlobalInvocationID.z;
+
+    weights.current_offset += z_index * weights_stride_w >> 2;
+
+    for(int d = 0; d < int(weights_depth); ++d)
+    {
+        // load 3 weights once
+        vec3 w;
+        vec4 r[2];
+
+        // first line
+        LOAD3(w, weights, tensor3D_offset(weights, 0, 0, 0));
+
+        r = CONVOLVE1x3(src.current_offset >> uint(2), w);
+        pixels[0] += r[0];
+        pixels[1] += r[1];
+
+        // second line
+        LOAD3(w, weights, tensor3D_offset(weights, 0, 1, 0));
+
+        r = CONVOLVE1x3((src.current_offset + (src_stride_y >> 2)) >> uint(2), w);
+        pixels[0] += r[0];
+        pixels[1] += r[1];
+
+        // third line
+        LOAD3(w, weights, tensor3D_offset(weights, 0, 2, 0));
+
+        r = CONVOLVE1x3((src.current_offset + (src_stride_y >> 1)) >> uint(2), w);
+        pixels[0] += r[0];
+        pixels[1] += r[1];
+
+        src.current_offset += src_stride_z >> 2;
+        weights.current_offset += weights_stride_z >> 2;
+    }
+
+#ifdef BIAS
+    float b;
+    LOAD1(b, biases, vector_offset(biases, int(z_index)));
+    pixels[0] += vec4(b);
+    pixels[1] += vec4(b);
+#endif /* BIAS */
+
+    STORE2(dst, dst.current_offset >> uint(2), pixels);
+}
+#elif defined(PROCESS_4_ELEMENT)
+BUFFER_DECLARATION(src, 1, vec4, readonly);
+BUFFER_DECLARATION(dst, 2, vec4, writeonly);
+BUFFER_DECLARATION(weights, 3, float, readonly);
+#ifdef BIAS
+BUFFER_DECLARATION(biases, 4, float, readonly);
+#endif /* BIAS */
+
+#if STRIDE_X == 2
+#define CONVOLVE1x3(offset, w) convolve1x3_stride2(offset, w)
+#elif STRIDE_X == 1 /* STRIDE_X == 1 */
+#define CONVOLVE1x3(offset, w) convolve1x3_stride1(offset, w)
+#else /* STRIDE_X not equals 1 or 2 */
+#error STRIDE_X larger than 2 is not supported
+#endif /* STRIDE_X == 2 */
+
+vec4 convolve1x3_stride1(uint offset, vec3 w)
+{
+    vec4 tmp[2];
+    vec4 middle;
+    vec4 right;
+
+    LOAD2(tmp, src, offset);
+
+    middle = vec4(tmp[0].yzw, tmp[1].x);
+    right  = vec4(tmp[0].zw, tmp[1].xy);
+
+    tmp[1] = tmp[0] * w[0] + middle * w[1] + right * w[2];
+
+    return tmp[1];
+}
+
+vec4 convolve1x3_stride2(uint offset, vec3 w)
+{
+    vec4 left;
+    vec4 middle;
+    vec4 right;
+
+    vec4 tmp[3];
+
+    LOAD3(tmp, src, offset);
+
+    left   = vec4(tmp[0].xz, tmp[1].xz);
+    middle = vec4(tmp[0].yw, tmp[1].yw);
+    right  = vec4(tmp[0].z, tmp[1].xz, tmp[2].x);
+
+    tmp[0] = left * w[0] + middle * w[1] + right * w[2];
+
+    return tmp[0];
+}
+
+/** An optimized direct convolution 3x3 OpenGL ES compute shader for process 4 elements at once
+ *
+ * @note This OpenGL ES shader works with stride_x = 1 and 2
+ * @note The data type must be passed at compile time using "#define DATA_TYPE_FP32"
+ * @note If biases are used then "define HAS_BIAS" has to be passed at compile time
+ *
+ * @param[in]  src_ptr                               Pointer to the source tensor. Supported data types: F32
+ * @param[in]  src_stride_x                          Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                            src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                          Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                            src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                          Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                            src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes     The offset of the first element in the source tensor
+ * @param[out] dst_ptr                               Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                          Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                            dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                          Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                            dst_stride_y * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                          Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                            dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes     The offset of the first element in the destination tensor
+ * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p src_ptr
+ * @param[in]  weights_stride_x                      Stride of the weights tensor in X dimension (in bytes)
+ * @param[in]  weights_step_x                        weights_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  weights_stride_y                      Stride of the weights tensor in Y dimension (in bytes)
+ * @param[in]  weights_step_y                        weights_stride_y * number of elements along y processed per workitem(in bytes)
+ * @param[in]  weights_stride_z                      Stride of the weights tensor in Z dimension (in bytes)
+ * @param[in]  weights_step_z                        weights_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  weights_offset_first_element_in_bytes The offset of the first element in the weights tensor
+ * @param[in]  biases_ptr                            Pointer to the biases tensor. Same as @p src_ptr
+ * @param[in]  biases_stride_x                       Stride of the biases tensor in X dimension (in bytes)
+ * @param[in]  biases_step_x                         biases_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes  The offset of the first element in the biases tensor
+ * @param[in]  weights_stride_w                      Stride of the weights tensor in the 4th dimension
+ * @param[in]  weights_depth                         The third dimensions of the weights tensors
+ */
+void main()
+{
+    Image    src     = CONVERT_TO_IMAGE_STRUCT(src);
+    Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(weights);
+    Tensor3D dst     = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+#ifdef BIAS
+    Vector   biases  = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases);
+#endif /* BIAS */
+
+    vec4 pixels;
+    pixels = vec4(0);
+
+    uint z_index = gl_GlobalInvocationID.z;
+
+    weights.current_offset += z_index * weights_stride_w >> 2;
+
+    for(int d = 0; d < int(weights_depth); ++d)
+    {
+        // load 3 weights once
+        vec3 w;
+
+        // first line
+        LOAD3(w, weights, tensor3D_offset(weights, 0, 0, 0));
+
+        pixels += CONVOLVE1x3(src.current_offset >> uint(2), w);
+
+        // second line
+        LOAD3(w, weights, tensor3D_offset(weights, 0, 1, 0));
+
+        pixels += CONVOLVE1x3((src.current_offset + (src_stride_y >> 2)) >> uint(2), w);
+
+        // third line
+        LOAD3(w, weights, tensor3D_offset(weights, 0, 2, 0));
+
+        pixels += CONVOLVE1x3((src.current_offset + (src_stride_y >> 1)) >> uint(2), w);
+
+        src.current_offset += src_stride_z >> 2;
+        weights.current_offset += weights_stride_z >> 2;
+    }
+
+#ifdef BIAS
+    float b;
+    LOAD1(b, biases, vector_offset(biases, int(z_index)));
+    pixels += vec4(b);
+#endif /* BIAS */
+
+    STORE1(dst, dst.current_offset >> uint(2), pixels);
+}
+#elif defined(PROCESS_X_4ELEMENTS_Y_3ELEMENTS)
+BUFFER_DECLARATION(src, 1, vec4, readonly);
+BUFFER_DECLARATION(dst, 2, vec4, writeonly);
+BUFFER_DECLARATION(weights, 3, float, readonly);
+#ifdef BIAS
+BUFFER_DECLARATION(biases, 4, float, readonly);
+#endif /* BIAS */
+
+#define CONVOLVE1x3(left, middle, right, w) convolve1x3_stride1(left, middle, right, w)
+
+vec4 convolve1x3_stride1(vec4 left, vec4 middle, vec4 right, vec3 w)
+{
+    vec4 r;
+
+    r = left * w[0] + middle * w[1] + right * w[2];
+
+    return r;
+}
+
+/** An optimized direct convolution 3x3 OpenGL ES compute shader for process 4x3 elements at once
+ *
+ * @note This OpenGL ES shader works with stride_x = 1 and 2
+ * @note The data type must be passed at compile time using "#define DATA_TYPE_FP32"
+ * @note If biases are used then "define HAS_BIAS" has to be passed at compile time
+ *
+ * @param[in]  src_ptr                               Pointer to the source tensor. Supported data types: F32
+ * @param[in]  src_stride_x                          Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                            src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                          Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                            src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                          Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                            src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes     The offset of the first element in the source tensor
+ * @param[out] dst_ptr                               Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                          Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                            dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                          Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                            dst_stride_y * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                          Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                            dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes     The offset of the first element in the destination tensor
+ * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p src_ptr
+ * @param[in]  weights_stride_x                      Stride of the weights tensor in X dimension (in bytes)
+ * @param[in]  weights_step_x                        weights_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  weights_stride_y                      Stride of the weights tensor in Y dimension (in bytes)
+ * @param[in]  weights_step_y                        weights_stride_y * number of elements along y processed per workitem(in bytes)
+ * @param[in]  weights_stride_z                      Stride of the weights tensor in Z dimension (in bytes)
+ * @param[in]  weights_step_z                        weights_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  weights_offset_first_element_in_bytes The offset of the first element in the weights tensor
+ * @param[in]  biases_ptr                            Pointer to the biases tensor. Same as @p src_ptr
+ * @param[in]  biases_stride_x                       Stride of the biases tensor in X dimension (in bytes)
+ * @param[in]  biases_step_x                         biases_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes  The offset of the first element in the biases tensor
+ * @param[in]  weights_stride_w                      Stride of the weights tensor in the 4th dimension
+ * @param[in]  weights_depth                         The third dimensions of the weights tensors
+ */
+void main()
+{
+    Image    src     = CONVERT_TO_IMAGE_STRUCT(src);
+    Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(weights);
+    Tensor3D dst     = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+#ifdef BIAS
+    Vector   biases  = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases);
+#endif /* BIAS */
+
+    vec4 pixels[3];
+    pixels[0] = vec4(0);
+    pixels[1] = vec4(0);
+    pixels[2] = vec4(0);
+
+    uint z_index = gl_GlobalInvocationID.z;
+
+    weights.current_offset += z_index * weights_stride_w >> 2;
+
+    for(int d = 0; d < int(weights_depth); ++d)
+    {
+        // load 3 weights once
+        vec3 w[3];
+
+        LOAD3(w[0], weights, tensor3D_offset(weights, 0, 0, 0));
+        LOAD3(w[1], weights, tensor3D_offset(weights, 0, 1, 0));
+        LOAD3(w[2], weights, tensor3D_offset(weights, 0, 2, 0));
+
+        vec4 s[2];
+        vec4 middle;
+        vec4 right;
+        // first line
+        LOAD2(s, src, src.current_offset >> uint(2));
+        middle = vec4(s[0].yzw, s[1].x);
+        right  = vec4(s[0].zw, s[1].xy);
+        pixels[0] += CONVOLVE1x3(s[0], middle, right, w[0]);
+
+        // second line
+        LOAD2(s, src, (src.current_offset + (src_stride_y >> 2)) >> uint(2));
+        middle = vec4(s[0].yzw, s[1].x);
+        right  = vec4(s[0].zw, s[1].xy);
+        pixels[0] += CONVOLVE1x3(s[0], middle, right, w[1]);
+        pixels[1] += CONVOLVE1x3(s[0], middle, right, w[0]);
+
+        // third line
+        LOAD2(s, src, (src.current_offset + (src_stride_y >> 1)) >> uint(2));
+        middle = vec4(s[0].yzw, s[1].x);
+        right  = vec4(s[0].zw, s[1].xy);
+        pixels[0] += CONVOLVE1x3(s[0], middle, right, w[2]);
+        pixels[1] += CONVOLVE1x3(s[0], middle, right, w[1]);
+        pixels[2] += CONVOLVE1x3(s[0], middle, right, w[0]);
+
+        // forth line
+        LOAD2(s, src, (src.current_offset + (uint(3) * (src_stride_y >> 2))) >> uint(2));
+        middle = vec4(s[0].yzw, s[1].x);
+        right  = vec4(s[0].zw, s[1].xy);
+        pixels[1] += CONVOLVE1x3(s[0], middle, right, w[2]);
+        pixels[2] += CONVOLVE1x3(s[0], middle, right, w[1]);
+
+        // fifth line
+        LOAD2(s, src, (src.current_offset + (src_stride_y)) >> uint(2));
+        middle = vec4(s[0].yzw, s[1].x);
+        right  = vec4(s[0].zw, s[1].xy);
+        pixels[2] += CONVOLVE1x3(s[0], middle, right, w[2]);
+
+        src.current_offset += src_stride_z >> 2;
+        weights.current_offset += weights_stride_z >> 2;
+    }
+
+#ifdef BIAS
+    float b;
+    LOAD1(b, biases, vector_offset(biases, int(z_index)));
+
+    pixels[0] += vec4(b);
+    pixels[1] += vec4(b);
+    pixels[2] += vec4(b);
+#endif /* BIAS */
+
+    STORE1(dst, dst.current_offset >> uint(2), pixels[0]);
+    STORE1(dst, (dst.current_offset + (dst_stride_y >> 2)) >> uint(2), pixels[1]);
+    STORE1(dst, (dst.current_offset + (dst_stride_y >> 1)) >> uint(2), pixels[2]);
+}
+#elif defined(PROCESS_X_8ELEMENTS_Y_3ELEMENTS_FP16)
+precision mediump float;
+
+BUFFER_DECLARATION(src, 1, uvec4, readonly);
+BUFFER_DECLARATION(dst, 2, uvec4, writeonly);
+BUFFER_DECLARATION(weights, 3, uint, readonly);
+#ifdef BIAS
+BUFFER_DECLARATION(biases, 4, uint, readonly);
+#endif /* BIAS */
+
+#define CONVOLVE1x3(s, w) convolve1x3_stride1(s, w)
+
+vec4[2] convolve1x3_stride1(vec4 tmp[3], vec3 w)
+{
+    vec4 middle;
+    vec4 right;
+    vec4 r[2];
+
+    middle = vec4(tmp[0].yzw, tmp[1].x);
+    right  = vec4(tmp[0].zw, tmp[1].xy);
+
+    r[0] = tmp[0] * w[0] + middle * w[1] + right * w[2];
+
+    middle = vec4(tmp[1].yzw, tmp[2].x);
+    right  = vec4(tmp[1].zw, tmp[2].xy);
+
+    r[1] = tmp[1] * w[0] + middle * w[1] + right * w[2];
+
+    return r;
+}
+
+vec4[3] load_and_unpack(uint offset)
+{
+    uvec4 packed_s[2];
+    vec4  s[3];
+
+    LOAD1(packed_s[0], src, offset);
+    LOAD1(packed_s[1], src, offset + uint(1));
+    ;
+
+    s[0] = vec4(unpackHalf2x16(packed_s[0].x), unpackHalf2x16(packed_s[0].y));
+    s[1] = vec4(unpackHalf2x16(packed_s[0].z), unpackHalf2x16(packed_s[0].w));
+    s[2] = vec4(unpackHalf2x16(packed_s[1].x), unpackHalf2x16(packed_s[1].y));
+
+    return s;
+}
+
+/** An optimized direct convolution 3x3 OpenGL ES compute shader for process 8x3 elements at once
+ *
+ * @note This OpenGL ES shader works with stride_x = 1 and 2
+ * @note The data type must be passed at compile time using "#define DATA_TYPE_FP16"
+ * @note If biases are used then "define HAS_BIAS" has to be passed at compile time
+ *
+ * @param[in]  src_ptr                               Pointer to the source tensor. Supported data types: F16
+ * @param[in]  src_stride_x                          Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                            src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                          Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                            src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                          Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                            src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes     The offset of the first element in the source tensor
+ * @param[out] dst_ptr                               Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                          Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                            dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                          Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                            dst_stride_y * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                          Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                            dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes     The offset of the first element in the destination tensor
+ * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p src_ptr
+ * @param[in]  weights_stride_x                      Stride of the weights tensor in X dimension (in bytes)
+ * @param[in]  weights_step_x                        weights_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  weights_stride_y                      Stride of the weights tensor in Y dimension (in bytes)
+ * @param[in]  weights_step_y                        weights_stride_y * number of elements along y processed per workitem(in bytes)
+ * @param[in]  weights_stride_z                      Stride of the weights tensor in Z dimension (in bytes)
+ * @param[in]  weights_step_z                        weights_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  weights_offset_first_element_in_bytes The offset of the first element in the weights tensor
+ * @param[in]  biases_ptr                            Pointer to the biases tensor. Same as @p src_ptr
+ * @param[in]  biases_stride_x                       Stride of the biases tensor in X dimension (in bytes)
+ * @param[in]  biases_step_x                         biases_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes  The offset of the first element in the biases tensor
+ * @param[in]  weights_stride_w                      Stride of the weights tensor in the 4th dimension
+ * @param[in]  weights_depth                         The third dimensions of the weights tensors
+ */
+void main()
+{
+    Image    src     = CONVERT_TO_IMAGE_STRUCT_FP16(src);
+    Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP_FP16(weights);
+    Tensor3D dst     = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst);
+
+#ifdef BIAS
+    Vector   biases  = CONVERT_TO_VECTOR_STRUCT_NO_STEP_FP16(biases);
+#endif /* BIAS */
+
+    uvec2 packed_d[2];
+    uvec4 vd;
+
+    vec4 pixels[3][2];
+    int  i, j;
+    for(i = 0; i < 3; i++)
+    {
+        for(j = 0; j < 2; j++)
+        {
+            pixels[i][j] = vec4(0);
+        }
+    }
+
+    uint z_index = gl_GlobalInvocationID.z;
+
+    weights.current_offset += z_index * weights_stride_w;
+
+    for(int d = 0; d < int(weights_depth); ++d)
+    {
+        // load 3 weights once
+        uvec2 packed_w[3];
+
+        LOAD2(packed_w[0], weights, tensor3D_offset_fp16(weights, 0, 0, 0) >> 2);
+        LOAD2(packed_w[1], weights, tensor3D_offset_fp16(weights, 0, 1, 0) >> 2);
+        LOAD2(packed_w[2], weights, tensor3D_offset_fp16(weights, 0, 2, 0) >> 2);
+
+        vec3 w[3];
+        w[0] = vec3(unpackHalf2x16(packed_w[0].x), unpackHalf2x16(packed_w[0].y).x);
+        w[1] = vec3(unpackHalf2x16(packed_w[1].x), unpackHalf2x16(packed_w[1].y).x);
+        w[2] = vec3(unpackHalf2x16(packed_w[2].x), unpackHalf2x16(packed_w[2].y).x);
+
+        uvec4 packed_s[2];
+        vec4  s[3];
+        vec4  r[2];
+        uint  offset;
+        // first line
+        offset = src.current_offset >> uint(4);
+        s      = load_and_unpack(offset);
+
+        r = CONVOLVE1x3(s, w[0]);
+        pixels[0][0] += r[0];
+        pixels[0][1] += r[1];
+
+        // second line
+        offset = (src.current_offset + src_stride_y) >> uint(4);
+        s      = load_and_unpack(offset);
+
+        r = CONVOLVE1x3(s, w[1]);
+        pixels[0][0] += r[0];
+        pixels[0][1] += r[1];
+        r = CONVOLVE1x3(s, w[0]);
+        pixels[1][0] += r[0];
+        pixels[1][1] += r[1];
+
+        // third line
+        offset = (src.current_offset + (src_stride_y << 1)) >> uint(4);
+        s      = load_and_unpack(offset);
+
+        r = CONVOLVE1x3(s, w[2]);
+        pixels[0][0] += r[0];
+        pixels[0][1] += r[1];
+        r = CONVOLVE1x3(s, w[1]);
+        pixels[1][0] += r[0];
+        pixels[1][1] += r[1];
+        r = CONVOLVE1x3(s, w[0]);
+        pixels[2][0] += r[0];
+        pixels[2][1] += r[1];
+
+        // forth line
+        offset = (src.current_offset + uint(3) * (src_stride_y)) >> uint(4);
+        s      = load_and_unpack(offset);
+
+        r = CONVOLVE1x3(s, w[2]);
+        pixels[1][0] += r[0];
+        pixels[1][1] += r[1];
+        r = CONVOLVE1x3(s, w[1]);
+        pixels[2][0] += r[0];
+        pixels[2][1] += r[1];
+
+        // fifth line
+        offset = (src.current_offset + (src_stride_y << 2)) >> uint(4);
+        s      = load_and_unpack(offset);
+
+        r = CONVOLVE1x3(s, w[2]);
+        pixels[2][0] += r[0];
+        pixels[2][1] += r[1];
+
+        src.current_offset += src_stride_z;
+        weights.current_offset += weights_stride_z;
+    }
+
+#ifdef BIAS
+    uint  packed_b;
+    float b;
+    LOAD1(packed_b, biases, vector_offset_fp16(biases, int(z_index)) >> 2);
+
+    if(z_index % uint(2) == uint(0))
+    {
+        b = unpackHalf2x16(packed_b).x;
+    }
+    else
+    {
+        b = unpackHalf2x16(packed_b).y;
+    }
+
+    for(i = 0; i < 3; i++)
+    {
+        for(j = 0; j < 2; j++)
+        {
+            pixels[i][j] += vec4(b);
+        }
+    }
+#endif /* BIAS */
+
+    packed_d[0] = uvec2(packHalf2x16(pixels[0][0].xy), packHalf2x16(pixels[0][0].zw));
+    packed_d[1] = uvec2(packHalf2x16(pixels[0][1].xy), packHalf2x16(pixels[0][1].zw));
+    vd          = uvec4(packed_d[0], packed_d[1]);
+    STORE1(dst, dst.current_offset >> uint(4), vd);
+
+    packed_d[0] = uvec2(packHalf2x16(pixels[1][0].xy), packHalf2x16(pixels[1][0].zw));
+    packed_d[1] = uvec2(packHalf2x16(pixels[1][1].xy), packHalf2x16(pixels[1][1].zw));
+    vd          = uvec4(packed_d[0], packed_d[1]);
+    STORE1(dst, (dst.current_offset + dst_stride_y) >> uint(4), vd);
+
+    packed_d[0] = uvec2(packHalf2x16(pixels[2][0].xy), packHalf2x16(pixels[2][0].zw));
+    packed_d[1] = uvec2(packHalf2x16(pixels[2][1].xy), packHalf2x16(pixels[2][1].zw));
+    vd          = uvec4(packed_d[0], packed_d[1]);
+    STORE1(dst, (dst.current_offset + (dst_stride_y << 1)) >> uint(4), vd);
+}
+#elif defined(PROCESS_X_4ELEMENTS_FP16)
+precision mediump float;
+
+BUFFER_DECLARATION(src, 1, uvec2, readonly);
+BUFFER_DECLARATION(dst, 2, uvec2, writeonly);
+BUFFER_DECLARATION(weights, 3, uint, readonly);
+#ifdef BIAS
+BUFFER_DECLARATION(biases, 4, uint, readonly);
+#endif /* BIAS */
+
+#if STRIDE_X == 2
+#define CONVOLVE1x3(s, w) convolve1x3_stride2(s, w)
+#define LOAD_AND_UNPACK(offset) load_and_unpack_stride2(offset)
+#elif STRIDE_X == 1 /* STRIDE_X == 1 */
+#define CONVOLVE1x3(s, w) convolve1x3_stride1(s, w)
+#define LOAD_AND_UNPACK(offset) load_and_unpack_stride1(offset)
+#else /* STRIDE_X not equals 1 or 2 */
+#error STRIDE_X larger than 2 is not supported
+#endif /* STRIDE_X == 2 */
+
+vec4 convolve1x3_stride1(vec4 tmp[2], vec3 w)
+{
+    vec4 middle;
+    vec4 right;
+    vec4 r;
+
+    middle = vec4(tmp[0].yzw, tmp[1].x);
+    right  = vec4(tmp[0].zw, tmp[1].xy);
+
+    r = tmp[0] * w[0] + middle * w[1] + right * w[2];
+
+    return r;
+}
+
+vec4 convolve1x3_stride2(vec4 tmp[3], vec3 w)
+{
+    vec4 left;
+    vec4 middle;
+    vec4 right;
+    vec4 r;
+
+    left   = vec4(tmp[0].xz, tmp[1].xz);
+    middle = vec4(tmp[0].yw, tmp[1].yw);
+    right  = vec4(tmp[0].z, tmp[1].xz, tmp[2].x);
+
+    r = left * w[0] + middle * w[1] + right * w[2];
+
+    return r;
+}
+
+vec4[2] load_and_unpack_stride1(uint offset)
+{
+    uvec2 packed_s[2];
+    vec4  s[2];
+
+    LOAD1(packed_s[0], src, offset);
+    LOAD1(packed_s[1], src, offset + uint(1));
+
+    s[0] = vec4(unpackHalf2x16(packed_s[0].x), unpackHalf2x16(packed_s[0].y));
+    s[1] = vec4(unpackHalf2x16(packed_s[1].x), unpackHalf2x16(packed_s[1].y));
+
+    return s;
+}
+
+vec4[3] load_and_unpack_stride2(uint offset)
+{
+    uvec2 packed_s[3];
+    vec4  s[3];
+
+    LOAD1(packed_s[0], src, offset);
+    LOAD1(packed_s[1], src, offset + uint(1));
+    LOAD1(packed_s[2], src, offset + uint(2));
+
+    s[0] = vec4(unpackHalf2x16(packed_s[0].x), unpackHalf2x16(packed_s[0].y));
+    s[1] = vec4(unpackHalf2x16(packed_s[1].x), unpackHalf2x16(packed_s[1].y));
+    s[2] = vec4(unpackHalf2x16(packed_s[2].x), unpackHalf2x16(packed_s[2].y));
+
+    return s;
+}
+
+/** An optimized direct convolution 3x3 OpenGL ES compute shader for process 4 elements at once
+ *
+ * @note This OpenGL ES shader works with stride_x = 1 and 2
+ * @note The data type must be passed at compile time using "#define DATA_TYPE_FP16"
+ * @note If biases are used then "define HAS_BIAS" has to be passed at compile time
+ *
+ * @param[in]  src_ptr                               Pointer to the source tensor. Supported data types: F16
+ * @param[in]  src_stride_x                          Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                            src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                          Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                            src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                          Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                            src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes     The offset of the first element in the source tensor
+ * @param[out] dst_ptr                               Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                          Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                            dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                          Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                            dst_stride_y * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                          Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                            dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes     The offset of the first element in the destination tensor
+ * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p src_ptr
+ * @param[in]  weights_stride_x                      Stride of the weights tensor in X dimension (in bytes)
+ * @param[in]  weights_step_x                        weights_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  weights_stride_y                      Stride of the weights tensor in Y dimension (in bytes)
+ * @param[in]  weights_step_y                        weights_stride_y * number of elements along y processed per workitem(in bytes)
+ * @param[in]  weights_stride_z                      Stride of the weights tensor in Z dimension (in bytes)
+ * @param[in]  weights_step_z                        weights_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  weights_offset_first_element_in_bytes The offset of the first element in the weights tensor
+ * @param[in]  biases_ptr                            Pointer to the biases tensor. Same as @p src_ptr
+ * @param[in]  biases_stride_x                       Stride of the biases tensor in X dimension (in bytes)
+ * @param[in]  biases_step_x                         biases_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes  The offset of the first element in the biases tensor
+ * @param[in]  weights_stride_w                      Stride of the weights tensor in the 4th dimension
+ * @param[in]  weights_depth                         The third dimensions of the weights tensors
+ */
+void main()
+{
+    Image    src     = CONVERT_TO_IMAGE_STRUCT_FP16(src);
+    Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP_FP16(weights);
+    Tensor3D dst     = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst);
+
+#ifdef BIAS
+    Vector   biases  = CONVERT_TO_VECTOR_STRUCT_NO_STEP_FP16(biases);
+#endif /* BIAS */
+
+    uvec2 packed_d;
+
+    vec4 pixels = vec4(0);
+
+    uint z_index = gl_GlobalInvocationID.z;
+
+    weights.current_offset += z_index * weights_stride_w;
+
+    for(int d = 0; d < int(weights_depth); ++d)
+    {
+        // load 3 weights once
+        uvec2 packed_w[3];
+
+        LOAD2(packed_w[0], weights, tensor3D_offset_fp16(weights, 0, 0, 0) >> 2);
+        LOAD2(packed_w[1], weights, tensor3D_offset_fp16(weights, 0, 1, 0) >> 2);
+        LOAD2(packed_w[2], weights, tensor3D_offset_fp16(weights, 0, 2, 0) >> 2);
+
+        vec3 w[3];
+        w[0] = vec3(unpackHalf2x16(packed_w[0].x), unpackHalf2x16(packed_w[0].y).x);
+        w[1] = vec3(unpackHalf2x16(packed_w[1].x), unpackHalf2x16(packed_w[1].y).x);
+        w[2] = vec3(unpackHalf2x16(packed_w[2].x), unpackHalf2x16(packed_w[2].y).x);
+
+#if STRIDE_X == 2
+        vec4 s[3];
+#elif STRIDE_X == 1 /* STRIDE_X == 1 */
+        vec4 s[2];
+#else               /* STRIDE_X not equals 1 or 2 */
+#error STRIDE_X larger than 2 is not supported
+#endif /* STRIDE_X == 2 */
+        vec4 r;
+        uint offset;
+        // first line
+        offset = src.current_offset >> uint(3);
+        s      = LOAD_AND_UNPACK(offset);
+
+        pixels += CONVOLVE1x3(s, w[0]);
+
+        // second line
+        offset = (src.current_offset + src_stride_y) >> uint(3);
+        s      = LOAD_AND_UNPACK(offset);
+
+        pixels += CONVOLVE1x3(s, w[1]);
+
+        // third line
+        offset = (src.current_offset + (src_stride_y << 1)) >> uint(3);
+        s      = LOAD_AND_UNPACK(offset);
+
+        pixels += CONVOLVE1x3(s, w[2]);
+
+        src.current_offset += src_stride_z;
+        weights.current_offset += weights_stride_z;
+    }
+
+#ifdef BIAS
+    uint  packed_b;
+    float b;
+    LOAD1(packed_b, biases, vector_offset_fp16(biases, int(z_index)) >> 2);
+
+    if(z_index % uint(2) == uint(0))
+    {
+        b = unpackHalf2x16(packed_b).x;
+    }
+    else
+    {
+        b = unpackHalf2x16(packed_b).y;
+    }
+
+    pixels += vec4(b);
+#endif /* BIAS */
+
+    packed_d = uvec2(packHalf2x16(pixels.xy), packHalf2x16(pixels.zw));
+    STORE1(dst, dst.current_offset >> uint(3), packed_d);
+}
+#elif defined(PROCESS_X_4ELEMENTS_Y_3ELEMENTS_FP16)
+precision mediump float;
+
+BUFFER_DECLARATION(src, 1, uvec2, readonly);
+BUFFER_DECLARATION(dst, 2, uvec2, writeonly);
+BUFFER_DECLARATION(weights, 3, uint, readonly);
+#ifdef BIAS
+BUFFER_DECLARATION(biases, 4, uint, readonly);
+#endif /* BIAS */
+
+#define CONVOLVE1x3(s, w) convolve1x3_stride1(s, w)
+
+vec4 convolve1x3_stride1(vec4 tmp[2], vec3 w)
+{
+    vec4 middle;
+    vec4 right;
+    vec4 r;
+
+    middle = vec4(tmp[0].yzw, tmp[1].x);
+    right  = vec4(tmp[0].zw, tmp[1].xy);
+
+    r = tmp[0] * w[0] + middle * w[1] + right * w[2];
+
+    return r;
+}
+
+vec4[2] load_and_unpack(uint offset)
+{
+    uvec2 packed_s[2];
+    vec4  s[2];
+
+    LOAD1(packed_s[0], src, offset);
+    LOAD1(packed_s[1], src, offset + uint(1));
+
+    s[0] = vec4(unpackHalf2x16(packed_s[0].x), unpackHalf2x16(packed_s[0].y));
+    s[1] = vec4(unpackHalf2x16(packed_s[1].x), unpackHalf2x16(packed_s[1].y));
+
+    return s;
+}
+
+/** An optimized direct convolution 3x3 OpenGL ES compute shader for process 4x3 elements at once
+ *
+ * @note This OpenGL ES shader works with stride_x = 1 and 2
+ * @note The data type must be passed at compile time using "#define DATA_TYPE_FP16"
+ * @note If biases are used then "define HAS_BIAS" has to be passed at compile time
+ *
+ * @param[in]  src_ptr                               Pointer to the source tensor. Supported data types: F16
+ * @param[in]  src_stride_x                          Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                            src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                          Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                            src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                          Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                            src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes     The offset of the first element in the source tensor
+ * @param[out] dst_ptr                               Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                          Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                            dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                          Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                            dst_stride_y * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                          Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                            dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes     The offset of the first element in the destination tensor
+ * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p src_ptr
+ * @param[in]  weights_stride_x                      Stride of the weights tensor in X dimension (in bytes)
+ * @param[in]  weights_step_x                        weights_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  weights_stride_y                      Stride of the weights tensor in Y dimension (in bytes)
+ * @param[in]  weights_step_y                        weights_stride_y * number of elements along y processed per workitem(in bytes)
+ * @param[in]  weights_stride_z                      Stride of the weights tensor in Z dimension (in bytes)
+ * @param[in]  weights_step_z                        weights_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  weights_offset_first_element_in_bytes The offset of the first element in the weights tensor
+ * @param[in]  biases_ptr                            Pointer to the biases tensor. Same as @p src_ptr
+ * @param[in]  biases_stride_x                       Stride of the biases tensor in X dimension (in bytes)
+ * @param[in]  biases_step_x                         biases_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes  The offset of the first element in the biases tensor
+ * @param[in]  weights_stride_w                      Stride of the weights tensor in the 4th dimension
+ * @param[in]  weights_depth                         The third dimensions of the weights tensors
+ */
+void main()
+{
+    Image    src     = CONVERT_TO_IMAGE_STRUCT_FP16(src);
+    Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP_FP16(weights);
+    Tensor3D dst     = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst);
+
+#ifdef BIAS
+    Vector   biases  = CONVERT_TO_VECTOR_STRUCT_NO_STEP_FP16(biases);
+#endif /* BIAS */
+
+    uvec2 packed_d;
+
+    vec4 pixels[3];
+    int  i;
+
+    for(i = 0; i < 3; i++)
+    {
+        pixels[i] = vec4(0);
+    }
+
+    uint z_index = gl_GlobalInvocationID.z;
+
+    weights.current_offset += z_index * weights_stride_w;
+
+    for(int d = 0; d < int(weights_depth); ++d)
+    {
+        // load 3 weights once
+        uvec2 packed_w[3];
+
+        LOAD2(packed_w[0], weights, tensor3D_offset_fp16(weights, 0, 0, 0) >> 2);
+        LOAD2(packed_w[1], weights, tensor3D_offset_fp16(weights, 0, 1, 0) >> 2);
+        LOAD2(packed_w[2], weights, tensor3D_offset_fp16(weights, 0, 2, 0) >> 2);
+
+        vec3 w[3];
+        w[0] = vec3(unpackHalf2x16(packed_w[0].x), unpackHalf2x16(packed_w[0].y).x);
+        w[1] = vec3(unpackHalf2x16(packed_w[1].x), unpackHalf2x16(packed_w[1].y).x);
+        w[2] = vec3(unpackHalf2x16(packed_w[2].x), unpackHalf2x16(packed_w[2].y).x);
+
+        vec4 s[2];
+        vec4 r;
+        uint offset;
+        // first line
+        offset = src.current_offset >> uint(3);
+        s      = load_and_unpack(offset);
+
+        pixels[0] += CONVOLVE1x3(s, w[0]);
+
+        // second line
+        offset = (src.current_offset + src_stride_y) >> uint(3);
+        s      = load_and_unpack(offset);
+
+        pixels[0] += CONVOLVE1x3(s, w[1]);
+        pixels[1] += CONVOLVE1x3(s, w[0]);
+
+        // third line
+        offset = (src.current_offset + (src_stride_y << 1)) >> uint(3);
+        s      = load_and_unpack(offset);
+
+        pixels[0] += CONVOLVE1x3(s, w[2]);
+        pixels[1] += CONVOLVE1x3(s, w[1]);
+        pixels[2] += CONVOLVE1x3(s, w[0]);
+
+        // forth line
+        offset = (src.current_offset + uint(3) * (src_stride_y)) >> uint(3);
+        s      = load_and_unpack(offset);
+
+        pixels[1] += CONVOLVE1x3(s, w[2]);
+        pixels[2] += CONVOLVE1x3(s, w[1]);
+
+        // fifth line
+        offset = (src.current_offset + (src_stride_y << 2)) >> uint(3);
+        s      = load_and_unpack(offset);
+
+        pixels[2] += CONVOLVE1x3(s, w[2]);
+
+        src.current_offset += src_stride_z;
+        weights.current_offset += weights_stride_z;
+    }
+
+#ifdef BIAS
+    uint  packed_b;
+    float b;
+    LOAD1(packed_b, biases, vector_offset_fp16(biases, int(z_index)) >> 2);
+
+    if(z_index % uint(2) == uint(0))
+    {
+        b = unpackHalf2x16(packed_b).x;
+    }
+    else
+    {
+        b = unpackHalf2x16(packed_b).y;
+    }
+
+    for(i = 0; i < 3; i++)
+    {
+        pixels[i] += vec4(b);
+    }
+#endif /* BIAS */
+
+    packed_d = uvec2(packHalf2x16(pixels[0].xy), packHalf2x16(pixels[0].zw));
+    STORE1(dst, dst.current_offset >> uint(3), packed_d);
+
+    packed_d = uvec2(packHalf2x16(pixels[1].xy), packHalf2x16(pixels[1].zw));
+    STORE1(dst, (dst.current_offset + dst_stride_y) >> uint(3), packed_d);
+
+    packed_d = uvec2(packHalf2x16(pixels[2].xy), packHalf2x16(pixels[2].zw));
+    STORE1(dst, (dst.current_offset + (dst_stride_y << 1)) >> uint(3), packed_d);
+}
+#elif defined(PROCESS_X_4ELEMENTS_Y_4ELEMENTS_FP16)
+precision mediump float;
+
+BUFFER_DECLARATION(src, 1, uvec2, readonly);
+BUFFER_DECLARATION(dst, 2, uvec2, writeonly);
+BUFFER_DECLARATION(weights, 3, uint, readonly);
+#ifdef BIAS
+BUFFER_DECLARATION(biases, 4, uint, readonly);
+#endif /* BIAS */
+
+#define CONVOLVE1x3(s, w) convolve1x3_stride1(s, w)
+
+vec4 convolve1x3_stride1(vec4 tmp[2], vec3 w)
+{
+    vec4 middle;
+    vec4 right;
+    vec4 r;
+
+    middle = vec4(tmp[0].yzw, tmp[1].x);
+    right  = vec4(tmp[0].zw, tmp[1].xy);
+
+    r = tmp[0] * w[0] + middle * w[1] + right * w[2];
+
+    return r;
+}
+
+vec4[2] load_and_unpack(uint offset)
+{
+    uvec2 packed_s[2];
+    vec4  s[2];
+
+    LOAD1(packed_s[0], src, offset);
+    LOAD1(packed_s[1], src, offset + uint(1));
+
+    s[0] = vec4(unpackHalf2x16(packed_s[0].x), unpackHalf2x16(packed_s[0].y));
+    s[1] = vec4(unpackHalf2x16(packed_s[1].x), unpackHalf2x16(packed_s[1].y));
+
+    return s;
+}
+
+/** An optimized direct convolution 3x3 OpenGL ES compute shader for process 4x4 elements at once
+ *
+ * @note This OpenGL ES shader works with stride_x = 1 and 2
+ * @note The data type must be passed at compile time using "#define DATA_TYPE_FP16"
+ * @note If biases are used then "define HAS_BIAS" has to be passed at compile time
+ *
+ * @param[in]  src_ptr                               Pointer to the source tensor. Supported data types: F16
+ * @param[in]  src_stride_x                          Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                            src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                          Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                            src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                          Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                            src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes     The offset of the first element in the source tensor
+ * @param[out] dst_ptr                               Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                          Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                            dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                          Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                            dst_stride_y * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                          Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                            dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes     The offset of the first element in the destination tensor
+ * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p src_ptr
+ * @param[in]  weights_stride_x                      Stride of the weights tensor in X dimension (in bytes)
+ * @param[in]  weights_step_x                        weights_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  weights_stride_y                      Stride of the weights tensor in Y dimension (in bytes)
+ * @param[in]  weights_step_y                        weights_stride_y * number of elements along y processed per workitem(in bytes)
+ * @param[in]  weights_stride_z                      Stride of the weights tensor in Z dimension (in bytes)
+ * @param[in]  weights_step_z                        weights_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  weights_offset_first_element_in_bytes The offset of the first element in the weights tensor
+ * @param[in]  biases_ptr                            Pointer to the biases tensor. Same as @p src_ptr
+ * @param[in]  biases_stride_x                       Stride of the biases tensor in X dimension (in bytes)
+ * @param[in]  biases_step_x                         biases_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes  The offset of the first element in the biases tensor
+ * @param[in]  weights_stride_w                      Stride of the weights tensor in the 4th dimension
+ * @param[in]  weights_depth                         The third dimensions of the weights tensors
+ */
+void main()
+{
+    Image    src     = CONVERT_TO_IMAGE_STRUCT_FP16(src);
+    Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP_FP16(weights);
+    Tensor3D dst     = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst);
+
+#ifdef BIAS
+    Vector   biases  = CONVERT_TO_VECTOR_STRUCT_NO_STEP_FP16(biases);
+#endif /* BIAS */
+
+    uvec2 packed_d;
+
+    vec4 pixels[4];
+    int  i;
+
+    for(i = 0; i < 4; i++)
+    {
+        pixels[i] = vec4(0);
+    }
+
+    uint z_index = gl_GlobalInvocationID.z;
+
+    weights.current_offset += z_index * weights_stride_w;
+
+    for(int d = 0; d < int(weights_depth); ++d)
+    {
+        // load 3 weights once
+        uvec2 packed_w[3];
+
+        LOAD2(packed_w[0], weights, tensor3D_offset_fp16(weights, 0, 0, 0) >> 2);
+        LOAD2(packed_w[1], weights, tensor3D_offset_fp16(weights, 0, 1, 0) >> 2);
+        LOAD2(packed_w[2], weights, tensor3D_offset_fp16(weights, 0, 2, 0) >> 2);
+
+        vec3 w[3];
+        w[0] = vec3(unpackHalf2x16(packed_w[0].x), unpackHalf2x16(packed_w[0].y).x);
+        w[1] = vec3(unpackHalf2x16(packed_w[1].x), unpackHalf2x16(packed_w[1].y).x);
+        w[2] = vec3(unpackHalf2x16(packed_w[2].x), unpackHalf2x16(packed_w[2].y).x);
+
+        vec4 s[2];
+        vec4 r;
+        uint offset;
+        // first line
+        offset = src.current_offset >> uint(3);
+        s      = load_and_unpack(offset);
+
+        pixels[0] += CONVOLVE1x3(s, w[0]);
+
+        // second line
+        offset = (src.current_offset + src_stride_y) >> uint(3);
+        s      = load_and_unpack(offset);
+
+        pixels[0] += CONVOLVE1x3(s, w[1]);
+        pixels[1] += CONVOLVE1x3(s, w[0]);
+
+        // third line
+        offset = (src.current_offset + (src_stride_y << 1)) >> uint(3);
+        s      = load_and_unpack(offset);
+
+        pixels[0] += CONVOLVE1x3(s, w[2]);
+        pixels[1] += CONVOLVE1x3(s, w[1]);
+        pixels[2] += CONVOLVE1x3(s, w[0]);
+
+        // forth line
+        offset = (src.current_offset + uint(3) * (src_stride_y)) >> uint(3);
+        s      = load_and_unpack(offset);
+
+        pixels[1] += CONVOLVE1x3(s, w[2]);
+        pixels[2] += CONVOLVE1x3(s, w[1]);
+        pixels[3] += CONVOLVE1x3(s, w[0]);
+
+        // fifth line
+        offset = (src.current_offset + (src_stride_y << 2)) >> uint(3);
+        s      = load_and_unpack(offset);
+
+        pixels[2] += CONVOLVE1x3(s, w[2]);
+        pixels[3] += CONVOLVE1x3(s, w[1]);
+
+        // sixth line
+        offset = (src.current_offset + uint(5) * (src_stride_y)) >> uint(3);
+        s      = load_and_unpack(offset);
+
+        pixels[3] += CONVOLVE1x3(s, w[2]);
+
+        src.current_offset += src_stride_z;
+        weights.current_offset += weights_stride_z;
+    }
+
+#ifdef BIAS
+    uint  packed_b;
+    float b;
+    LOAD1(packed_b, biases, vector_offset_fp16(biases, int(z_index)) >> 2);
+
+    if(z_index % uint(2) == uint(0))
+    {
+        b = unpackHalf2x16(packed_b).x;
+    }
+    else
+    {
+        b = unpackHalf2x16(packed_b).y;
+    }
+
+    for(i = 0; i < 4; i++)
+    {
+        pixels[i] += vec4(b);
+    }
+#endif /* BIAS */
+
+    packed_d = uvec2(packHalf2x16(pixels[0].xy), packHalf2x16(pixels[0].zw));
+    STORE1(dst, dst.current_offset >> uint(3), packed_d);
+
+    packed_d = uvec2(packHalf2x16(pixels[1].xy), packHalf2x16(pixels[1].zw));
+    STORE1(dst, (dst.current_offset + dst_stride_y) >> uint(3), packed_d);
+
+    packed_d = uvec2(packHalf2x16(pixels[2].xy), packHalf2x16(pixels[2].zw));
+    STORE1(dst, (dst.current_offset + (dst_stride_y << 1)) >> uint(3), packed_d);
+
+    packed_d = uvec2(packHalf2x16(pixels[3].xy), packHalf2x16(pixels[3].zw));
+    STORE1(dst, (dst.current_offset + uint(3) * (dst_stride_y)) >> uint(3), packed_d);
+}
+#elif defined(PROCESS_X_4ELEMENTS_Y_3ELEMENTS_Z_2ELEMENTS_FP16)
+precision mediump float;
+
+BUFFER_DECLARATION(src, 1, uvec2, readonly);
+BUFFER_DECLARATION(dst, 2, uvec2, writeonly);
+BUFFER_DECLARATION(weights, 3, uint, readonly);
+#ifdef BIAS
+BUFFER_DECLARATION(biases, 4, uint, readonly);
+#endif /* BIAS */
+
+#define CONVOLVE1x3(s, w) convolve1x3_stride1(s, w)
+
+vec4 convolve1x3_stride1(vec4 tmp[2], vec3 w)
+{
+    vec4 middle;
+    vec4 right;
+    vec4 r;
+
+    middle = vec4(tmp[0].yzw, tmp[1].x);
+    right  = vec4(tmp[0].zw, tmp[1].xy);
+
+    r = tmp[0] * w[0] + middle * w[1] + right * w[2];
+
+    return r;
+}
+
+vec4[2] load_and_unpack(uint offset)
+{
+    uvec2 packed_s[2];
+    vec4  s[2];
+
+    LOAD1(packed_s[0], src, offset);
+    LOAD1(packed_s[1], src, offset + uint(1));
+
+    s[0] = vec4(unpackHalf2x16(packed_s[0].x), unpackHalf2x16(packed_s[0].y));
+    s[1] = vec4(unpackHalf2x16(packed_s[1].x), unpackHalf2x16(packed_s[1].y));
+
+    return s;
+}
+
+/** An optimized direct convolution 3x3 OpenGL ES compute shader for process 4x3x2 elements at once
+ *
+ * @note This OpenGL ES shader works with stride_x = 1 and 2
+ * @note The data type must be passed at compile time using "#define DATA_TYPE_FP16"
+ * @note If biases are used then "define HAS_BIAS" has to be passed at compile time
+ *
+ * @param[in]  src_ptr                               Pointer to the source tensor. Supported data types: F16
+ * @param[in]  src_stride_x                          Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                            src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                          Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                            src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                          Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                            src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes     The offset of the first element in the source tensor
+ * @param[out] dst_ptr                               Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                          Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                            dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                          Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                            dst_stride_y * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                          Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                            dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes     The offset of the first element in the destination tensor
+ * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p src_ptr
+ * @param[in]  weights_stride_x                      Stride of the weights tensor in X dimension (in bytes)
+ * @param[in]  weights_step_x                        weights_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  weights_stride_y                      Stride of the weights tensor in Y dimension (in bytes)
+ * @param[in]  weights_step_y                        weights_stride_y * number of elements along y processed per workitem(in bytes)
+ * @param[in]  weights_stride_z                      Stride of the weights tensor in Z dimension (in bytes)
+ * @param[in]  weights_step_z                        weights_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  weights_offset_first_element_in_bytes The offset of the first element in the weights tensor
+ * @param[in]  biases_ptr                            Pointer to the biases tensor. Same as @p src_ptr
+ * @param[in]  biases_stride_x                       Stride of the biases tensor in X dimension (in bytes)
+ * @param[in]  biases_step_x                         biases_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes  The offset of the first element in the biases tensor
+ * @param[in]  weights_stride_w                      Stride of the weights tensor in the 4th dimension
+ * @param[in]  weights_depth                         The third dimensions of the weights tensors
+ */
+void main()
+{
+    Image    src     = CONVERT_TO_IMAGE_STRUCT_FP16(src);
+    Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP_FP16(weights);
+    Tensor3D dst     = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst);
+
+#ifdef BIAS
+    Vector   biases  = CONVERT_TO_VECTOR_STRUCT_NO_STEP_FP16(biases);
+#endif /* BIAS */
+
+    uvec2 packed_d;
+
+    vec4 pixels[3];
+    int  i;
+
+    uint z_base_index = gl_GlobalInvocationID.z << 1;
+
+    // store orginal src current offset
+    uint s_offset = src.current_offset;
+
+    weights.current_offset += z_base_index * weights_stride_w;
+
+    for(int z = 0; z < 2; ++z)
+    {
+        uint z_index = z_base_index + uint(z);
+
+        src.current_offset = s_offset;
+        //weights.current_offset = z_index * weights_stride_w;
+
+        for(i = 0; i < 3; i++)
+        {
+            pixels[i] = vec4(0);
+        }
+
+        for(int d = 0; d < int(weights_depth); ++d)
+        {
+            // load 3 weights once
+            uvec2 packed_w[3];
+
+            LOAD2(packed_w[0], weights, tensor3D_offset_fp16(weights, 0, 0, 0) >> 2);
+            LOAD2(packed_w[1], weights, tensor3D_offset_fp16(weights, 0, 1, 0) >> 2);
+            LOAD2(packed_w[2], weights, tensor3D_offset_fp16(weights, 0, 2, 0) >> 2);
+
+            vec3 w[3];
+            w[0] = vec3(unpackHalf2x16(packed_w[0].x), unpackHalf2x16(packed_w[0].y).x);
+            w[1] = vec3(unpackHalf2x16(packed_w[1].x), unpackHalf2x16(packed_w[1].y).x);
+            w[2] = vec3(unpackHalf2x16(packed_w[2].x), unpackHalf2x16(packed_w[2].y).x);
+
+            vec4 s[2];
+            vec4 r;
+            uint offset;
+            // first line
+            offset = src.current_offset >> uint(3);
+            s      = load_and_unpack(offset);
+
+            pixels[0] += CONVOLVE1x3(s, w[0]);
+
+            // second line
+            offset = (src.current_offset + src_stride_y) >> uint(3);
+            s      = load_and_unpack(offset);
+
+            pixels[0] += CONVOLVE1x3(s, w[1]);
+            pixels[1] += CONVOLVE1x3(s, w[0]);
+
+            // third line
+            offset = (src.current_offset + (src_stride_y << 1)) >> uint(3);
+            s      = load_and_unpack(offset);
+
+            pixels[0] += CONVOLVE1x3(s, w[2]);
+            pixels[1] += CONVOLVE1x3(s, w[1]);
+            pixels[2] += CONVOLVE1x3(s, w[0]);
+
+            // forth line
+            offset = (src.current_offset + uint(3) * (src_stride_y)) >> uint(3);
+            s      = load_and_unpack(offset);
+
+            pixels[1] += CONVOLVE1x3(s, w[2]);
+            pixels[2] += CONVOLVE1x3(s, w[1]);
+
+            // fifth line
+            offset = (src.current_offset + (src_stride_y << 2)) >> uint(3);
+            s      = load_and_unpack(offset);
+
+            pixels[2] += CONVOLVE1x3(s, w[2]);
+
+            src.current_offset += src_stride_z;
+            weights.current_offset += weights_stride_z;
+        }
+
+#ifdef BIAS
+        uint  packed_b;
+        float b;
+        LOAD1(packed_b, biases, vector_offset_fp16(biases, int(z_index)) >> 2);
+
+        if(z_index % uint(2) == uint(0))
+        {
+            b = unpackHalf2x16(packed_b).x;
+        }
+        else
+        {
+            b = unpackHalf2x16(packed_b).y;
+        }
+
+        for(i = 0; i < 3; i++)
+        {
+            pixels[i] += vec4(b);
+        }
+#endif /* BIAS */
+
+        packed_d = uvec2(packHalf2x16(pixels[0].xy), packHalf2x16(pixels[0].zw));
+        STORE1(dst, dst.current_offset >> uint(3), packed_d);
+
+        packed_d = uvec2(packHalf2x16(pixels[1].xy), packHalf2x16(pixels[1].zw));
+        STORE1(dst, (dst.current_offset + dst_stride_y) >> uint(3), packed_d);
+
+        packed_d = uvec2(packHalf2x16(pixels[2].xy), packHalf2x16(pixels[2].zw));
+        STORE1(dst, (dst.current_offset + (dst_stride_y << 1)) >> uint(3), packed_d);
+
+        dst.current_offset += dst_stride_z;
+    }
+}
+#endif /* PROCESS_1_ELEMENT */
diff --git a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution5x5.cs b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution5x5.cs
new file mode 100644
index 0000000000..4fdbf0d19e
--- /dev/null
+++ b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution5x5.cs
@@ -0,0 +1,313 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
+
+#include "helpers.h"
+
+layout(std140) uniform shader_params
+{
+    TENSOR3D_PARAM_DECLARATION(src);
+    TENSOR3D_PARAM_DECLARATION(dst);
+    TENSOR3D_PARAM_DECLARATION(weights);
+#ifdef BIAS
+    VECTOR_PARAM_DECLARATION(biases);
+#endif /* BIAS */
+    uint weights_stride_w;
+    uint weights_depth;
+};
+
+#ifdef DATA_TYPE_FP32
+
+precision highp float;
+
+BUFFER_DECLARATION(src, 1, float, readonly);
+BUFFER_DECLARATION(dst, 2, float, writeonly);
+BUFFER_DECLARATION(weights, 3, float, readonly);
+#ifdef BIAS
+BUFFER_DECLARATION(biases, 4, float, readonly);
+#endif /* BIAS */
+
+#define LOAD20(r, name, offset)           \
+    r[0] = LOAD4(name, offset);           \
+    r[1] = LOAD4(name, offset + uint(1)); \
+    r[2] = LOAD4(name, offset + uint(2)); \
+    r[3] = LOAD4(name, offset + uint(3)); \
+    r[4] = LOAD4(name, offset + uint(4))
+
+/** This kernel performs a direct convolution to convolve the low three dimensions.
+ *
+ * @note The data type must be passed at compile time using "#define DATA_TYPE_FP32"
+ * @note If biases are used then "define HAS_BIAS" has to be passed at compile time
+ *
+ * @param[in]  src_ptr                               Pointer to the source tensor. Supported data types: F32
+ * @param[in]  src_stride_x                          Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                            src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                          Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                            src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                          Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                            src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes     The offset of the first element in the source tensor
+ * @param[out] dst_ptr                               Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                          Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                            dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                          Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                            dst_stride_y * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                          Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                            dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes     The offset of the first element in the destination tensor
+ * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p src_ptr
+ * @param[in]  weights_stride_x                      Stride of the weights tensor in X dimension (in bytes)
+ * @param[in]  weights_step_x                        weights_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  weights_stride_y                      Stride of the weights tensor in Y dimension (in bytes)
+ * @param[in]  weights_step_y                        weights_stride_y * number of elements along y processed per workitem(in bytes)
+ * @param[in]  weights_stride_z                      Stride of the weights tensor in Z dimension (in bytes)
+ * @param[in]  weights_step_z                        weights_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  weights_offset_first_element_in_bytes The offset of the first element in the weights tensor
+ * @param[in]  biases_ptr                            Pointer to the biases tensor. Same as @p src_ptr
+ * @param[in]  biases_stride_x                       Stride of the biases tensor in X dimension (in bytes)
+ * @param[in]  biases_step_x                         biases_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes  The offset of the first element in the biases tensor
+ * @param[in]  weights_stride_w                      Stride of the weights tensor in the 4th dimension
+ * @param[in]  weights_depth                         The third dimensions of the weights tensors
+ */
+void main()
+{
+    Image    src     = CONVERT_TO_IMAGE_STRUCT(src);
+    Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(weights);
+    Tensor3D dst     = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+#ifdef BIAS
+    Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases);
+#endif /* BIAS */
+
+    float pixels  = CONVERT(0, float);
+    uint  z_index = gl_GlobalInvocationID.z;
+    weights.current_offset += z_index * weights_stride_w >> 2;
+    float temp[5];
+    float temp_weight[5];
+
+    for(int d = 0; d < int(weights_depth); ++d)
+    {
+        LOAD20(temp, src, offset(src, 0, 0));
+        LOAD20(temp_weight, weights, tensor3D_offset(weights, 0, 0, 0));
+        pixels += temp[0] * temp_weight[0] + temp[1] * temp_weight[1] + temp[2] * temp_weight[2] + temp[3] * temp_weight[3] + temp[4] * temp_weight[4];
+
+        LOAD20(temp, src, offset(src, 0, 1));
+        LOAD20(temp_weight, weights, tensor3D_offset(weights, 0, 1, 0));
+        pixels += temp[0] * temp_weight[0] + temp[1] * temp_weight[1] + temp[2] * temp_weight[2] + temp[3] * temp_weight[3] + temp[4] * temp_weight[4];
+
+        LOAD20(temp, src, offset(src, 0, 2));
+        LOAD20(temp_weight, weights, tensor3D_offset(weights, 0, 2, 0));
+        pixels += temp[0] * temp_weight[0] + temp[1] * temp_weight[1] + temp[2] * temp_weight[2] + temp[3] * temp_weight[3] + temp[4] * temp_weight[4];
+
+        LOAD20(temp, src, offset(src, 0, 3));
+        LOAD20(temp_weight, weights, tensor3D_offset(weights, 0, 3, 0));
+        pixels += temp[0] * temp_weight[0] + temp[1] * temp_weight[1] + temp[2] * temp_weight[2] + temp[3] * temp_weight[3] + temp[4] * temp_weight[4];
+
+        LOAD20(temp, src, offset(src, 0, 4));
+        LOAD20(temp_weight, weights, tensor3D_offset(weights, 0, 4, 0));
+        pixels += temp[0] * temp_weight[0] + temp[1] * temp_weight[1] + temp[2] * temp_weight[2] + temp[3] * temp_weight[3] + temp[4] * temp_weight[4];
+
+        src.current_offset += (src_stride_z >> 2);
+        weights.current_offset += (weights_stride_z >> 2);
+    }
+
+#ifdef BIAS
+    pixels += LOAD4(biases, vector_offset(biases, int(z_index)));
+#endif /* BIAS */
+
+    STORE4(dst, CURRENT_OFFSET(dst), pixels);
+}
+
+#elif defined(DATA_TYPE_FP16)
+
+precision mediump float;
+
+BUFFER_DECLARATION(src, 1, uvec2, readonly);
+BUFFER_DECLARATION(dst, 2, uvec2, writeonly);
+BUFFER_DECLARATION(weights, 3, uint, readonly);
+#ifdef BIAS
+BUFFER_DECLARATION(biases, 4, uint, readonly);
+#endif /* BIAS */
+
+#if STRIDE_X == 1
+#define LOAD_SRC(src, row) load_src_stride1(src, row)
+#define CONVOLVE1x5(src, weight) convolve1x5_stride1(src, weight)
+#elif STRIDE_X == 2 /* STRIDE_X == 1 */
+#define LOAD_SRC(src, row) load_src_stride2(src, row)
+#define CONVOLVE1x5(src, weight) convolve1x5_stride2(src, weight)
+#else /* STRDIDE_X == 1 */
+#error STRIDE_X larger than 2 is not supported
+#endif /* STRIDE_X == 1 */
+
+vec4[2] load_src_stride1(Image src, int row)
+{
+    uvec2 packed[2];
+    vec4  ret[2];
+
+    GC_LOAD2_2D_OFFSET(packed, src, 0, row);
+
+    ret[0] = vec4(unpackHalf2x16(packed[0].x), unpackHalf2x16(packed[0].y));
+    ret[1] = vec4(unpackHalf2x16(packed[1].x), unpackHalf2x16(packed[1].y));
+
+    return ret;
+}
+
+vec4[3] load_src_stride2(Image src, int row)
+{
+    uvec2 packed[3];
+    vec4  ret[3];
+
+    GC_LOAD3_2D_OFFSET(packed, src, 0, row);
+
+    ret[0] = vec4(unpackHalf2x16(packed[0].x), unpackHalf2x16(packed[0].y));
+    ret[1] = vec4(unpackHalf2x16(packed[1].x), unpackHalf2x16(packed[1].y));
+    ret[2] = vec4(unpackHalf2x16(packed[2].x), unpackHalf2x16(packed[2].y));
+
+    return ret;
+}
+
+vec2[3] load_weight(Tensor3D weights, int row)
+{
+    uvec3 packed_w;
+    vec2  ret[3];
+
+    GC_LOAD3_3D_OFFSET(packed_w, weights, 0, row, 0);
+
+    ret[0] = vec2(unpackHalf2x16(packed_w[0]));
+    ret[1] = vec2(unpackHalf2x16(packed_w[1]));
+    ret[2] = vec2(unpackHalf2x16(packed_w[2]));
+
+    return ret;
+}
+
+// output 4 element per thread
+vec4 convolve1x5_stride1(vec4 tmp[2], vec2 w[3])
+{
+    vec4 src0 = tmp[0];
+    vec4 src1 = vec4(tmp[0].yzw, tmp[1].x);
+    vec4 src2 = vec4(tmp[0].zw, tmp[1].xy);
+    vec4 src3 = vec4(tmp[0].w, tmp[1].xyz);
+    vec4 src4 = tmp[1];
+    vec4 ret  = src0 * w[0].x + src1 * w[0].y + src2 * w[1].x + src3 * w[1].y + src4 * w[2].x;
+
+    return ret;
+}
+
+vec4 convolve1x5_stride2(vec4 tmp[3], vec2 w[3])
+{
+    vec4 src0 = vec4(tmp[0].xz, tmp[1].xz);
+    vec4 src1 = vec4(tmp[0].yw, tmp[1].yw);
+    vec4 src2 = vec4(tmp[0].z, tmp[1].xz, tmp[2].x);
+    vec4 src3 = vec4(tmp[0].w, tmp[1].yw, tmp[2].y);
+    vec4 src4 = vec4(tmp[1].x, tmp[1].z, tmp[2].xz);
+    vec4 ret  = src0 * w[0].x + src1 * w[0].y + src2 * w[1].x + src3 * w[1].y + src4 * w[2].x;
+
+    return ret;
+}
+
+/** This kernel performs a direct convolution to convolve the low three dimensions.
+ *
+ * @note The data type must be passed at compile time using "#define DATA_TYPE_FP16"
+ * @note If biases are used then "define HAS_BIAS" has to be passed at compile time
+ *
+ * @param[in]  src_ptr                               Pointer to the source tensor. Supported data types: F16
+ * @param[in]  src_stride_x                          Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                            src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                          Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                            src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                          Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                            src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes     The offset of the first element in the source tensor
+ * @param[out] dst_ptr                               Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                          Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                            dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                          Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                            dst_stride_y * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                          Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                            dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes     The offset of the first element in the destination tensor
+ * @param[out] weights_ptr                           Pointer to the weights tensor. Supported data types: same as @p src_ptr
+ * @param[in]  weights_stride_x                      Stride of the weights tensor in X dimension (in bytes)
+ * @param[in]  weights_step_x                        weights_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  weights_stride_y                      Stride of the weights tensor in Y dimension (in bytes)
+ * @param[in]  weights_step_y                        weights_stride_y * number of elements along y processed per workitem(in bytes)
+ * @param[in]  weights_stride_z                      Stride of the weights tensor in Z dimension (in bytes)
+ * @param[in]  weights_step_z                        weights_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  weights_offset_first_element_in_bytes The offset of the first element in the weights tensor
+ * @param[in]  biases_ptr                            Pointer to the biases tensor. Same as @p src_ptr
+ * @param[in]  biases_stride_x                       Stride of the biases tensor in X dimension (in bytes)
+ * @param[in]  biases_step_x                         biases_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  biases_offset_first_element_in_bytes  The offset of the first element in the biases tensor
+ * @param[in]  weights_stride_w                      Stride of the weights tensor in the 4th dimension
+ * @param[in]  weights_depth                         The third dimensions of the weights tensors
+ */
+void main()
+{
+    Image    src     = GC_CONVERT_TO_IMAGE_STRUCT(src);
+    Tensor3D weights = GC_CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(weights);
+    Tensor3D dst     = GC_CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+#ifdef BIAS
+    Vector   biases  = GC_CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases);
+#endif /* BIAS */
+
+    vec4  res = vec4(0);
+    vec2  w[3];
+    vec4  s[STRIDE_X + 1];
+    uvec2 packed_d;
+    uint  z_index = gl_GlobalInvocationID.z;
+
+    weights.current_offset += z_index * weights_stride_w;
+
+    for(int d = 0; d < int(weights_depth); ++d)
+    {
+        for(int row = 0; row < 5; row++)
+        {
+            w = load_weight(weights, row);
+            s = LOAD_SRC(src, row);
+            res += CONVOLVE1x5(s, w);
+        }
+
+        src.current_offset += src_stride_z;
+        weights.current_offset += weights_stride_z;
+    }
+
+#ifdef BIAS
+    uint  packed_b;
+    float b;
+
+    GC_LOAD1_1D_OFFSET(packed_b, biases, z_index);
+    b = (z_index % uint(2) == uint(0)) ? unpackHalf2x16(packed_b).x : unpackHalf2x16(packed_b).y;
+    res += vec4(b);
+#endif /* BIAS */
+
+    packed_d = uvec2(packHalf2x16(res.xy), packHalf2x16(res.zw));
+    GC_STORE1_3D_OFFSET(packed_d, dst, 0, 0, 0);
+}
+
+#else /* DATA_TYPE_FP16 */
+#error Data type not supported
+#endif /* DATA_TYPE_FP16 */
diff --git a/src/core/GLES_COMPUTE/cs_shaders/dropout.cs b/src/core/GLES_COMPUTE/cs_shaders/dropout.cs
new file mode 100644
index 0000000000..54e08b1306
--- /dev/null
+++ b/src/core/GLES_COMPUTE/cs_shaders/dropout.cs
@@ -0,0 +1,204 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
+
+#include "helpers.h"
+
+layout(std140) uniform shader_params
+{
+    TENSOR3D_PARAM_DECLARATION(src);
+    TENSOR3D_PARAM_DECLARATION(mask);
+    TENSOR3D_PARAM_DECLARATION(dst);
+};
+
+uint hash(uint x)
+{
+    x += (x << 10u);
+    x ^= (x >> 6u);
+    x += (x << 3u);
+    x ^= (x >> 11u);
+    x += (x << 15u);
+    return x;
+}
+
+uint hash(uvec3 v)
+{
+    return hash(v.x ^ hash(v.y) ^ hash(v.z));
+}
+
+float float_construct(uint m)
+{
+    const uint ieee_mantissa = 0x007FFFFFu;
+    const uint ieee_one      = 0x3F800000u;
+
+    m &= ieee_mantissa;
+    m |= ieee_one;
+
+    float f = uintBitsToFloat(m);
+    return f - 1.0;
+}
+
+float rand(vec3 v, float seed)
+{
+    return float_construct(hash(floatBitsToUint(v + seed)));
+}
+
+#ifdef DATA_TYPE_FP32
+
+precision highp float;
+
+BUFFER_DECLARATION(src, 1, float, readonly);
+BUFFER_DECLARATION(mask, 2, float, );
+BUFFER_DECLARATION(dst, 3, float, writeonly);
+
+/** Dropout is used to improve over-fit on neural networks.
+ *
+ * @note The data type must be passed at compile time using "#define DATA_TYPE_FP32"
+ *
+ * @param[in]  src_ptr                            Pointer to the source tensor. Supported data types: F32
+ * @param[in]  src_stride_x                       Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                         src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                       Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                         src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                       Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                         src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes  The offset of the first element in the source tensor
+ * @param[out] mask_ptr                           Pointer to the mask tensor. Supported data types: same as @p src_ptr
+ * @param[in]  mask_stride_x                      Stride of the mask tensor in X dimension (in bytes)
+ * @param[in]  mask_step_x                        mask_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  mask_stride_y                      Stride of the mask tensor in Y dimension (in bytes)
+ * @param[in]  mask_step_y                        mask_stride_y * number of elements along y processed per workitem(in bytes)
+ * @param[in]  mask_stride_z                      Stride of the mask tensor in Z dimension (in bytes)
+ * @param[in]  mask_step_z                        mask_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  mask_offset_first_element_in_bytes The offset of the first element in the mask tensor
+ * @param[out] dst_ptr                            Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                         dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination tensor
+ */
+void main(void)
+{
+    Tensor3D src  = GC_CONVERT_TO_TENSOR3D_STRUCT(src);
+    Tensor3D mask = GC_CONVERT_TO_TENSOR3D_STRUCT(mask);
+    Tensor3D dst  = GC_CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+    float random  = 0.f;
+    float inputv  = 0.f;
+    float maskv   = 0.f;
+    float outputv = 0.f;
+
+#ifdef FORWARD
+    random = rand(vec3(gl_GlobalInvocationID.xyz), SEED);
+    maskv  = (random > RATIO) ? 1.f : 0.f;
+    GC_STORE1_3D_OFFSET(maskv, mask, 0, 0, 0);
+#else  /* FORWARD */
+    GC_LOAD1_3D_OFFSET(maskv, mask, 0, 0, 0);
+#endif /* FORWARD */
+
+    GC_LOAD1_3D_OFFSET(inputv, src, 0, 0, 0);
+    outputv = maskv * inputv * float(SCALE);
+    GC_STORE1_3D_OFFSET(outputv, dst, 0, 0, 0);
+}
+
+#elif defined(DATA_TYPE_FP16)
+
+precision mediump float;
+
+BUFFER_DECLARATION(src, 1, uint, readonly);
+BUFFER_DECLARATION(mask, 2, uint, );
+BUFFER_DECLARATION(dst, 3, uint, writeonly);
+
+/** Dropout is used to improve over-fit on neural networks.
+ *
+ * @note The data type must be passed at compile time using "#define DATA_TYPE_FP16"
+ *
+ * @param[in]  src_ptr                            Pointer to the source tensor. Supported data types: F16
+ * @param[in]  src_stride_x                       Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                         src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                       Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                         src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                       Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                         src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes  The offset of the first element in the source tensor
+ * @param[out] mask_ptr                           Pointer to the mask tensor. Supported data types: same as @p src_ptr
+ * @param[in]  mask_stride_x                      Stride of the mask tensor in X dimension (in bytes)
+ * @param[in]  mask_step_x                        mask_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  mask_stride_y                      Stride of the mask tensor in Y dimension (in bytes)
+ * @param[in]  mask_step_y                        mask_stride_y * number of elements along y processed per workitem(in bytes)
+ * @param[in]  mask_stride_z                      Stride of the mask tensor in Z dimension (in bytes)
+ * @param[in]  mask_step_z                        mask_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  mask_offset_first_element_in_bytes The offset of the first element in the mask tensor
+ * @param[out] dst_ptr                            Pointer to the destination tensor. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                         dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination tensor
+ */
+void main(void)
+{
+    Tensor3D src  = GC_CONVERT_TO_TENSOR3D_STRUCT(src);
+    Tensor3D mask = GC_CONVERT_TO_TENSOR3D_STRUCT(mask);
+    Tensor3D dst  = GC_CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+    float random1    = 0.f;
+    float random2    = 0.f;
+    uint  inputv     = uint(0);
+    uint  outputv    = uint(0);
+    uint  maskv      = uint(0);
+    vec2  input_vec  = vec2(0, 0);
+    vec2  output_vec = vec2(0, 0);
+    vec2  mask_vec   = vec2(0, 0);
+
+#ifdef FORWARD
+    random1          = rand(vec3(gl_GlobalInvocationID.xyz), SEED);
+    random2          = rand(vec3(float(gl_GlobalInvocationID.x) + 0.5f, gl_GlobalInvocationID.yz), SEED);
+    mask_vec.x       = (random1 > RATIO) ? 1.f : 0.f;
+    mask_vec.y       = (random2 > RATIO) ? 1.f : 0.f;
+    maskv            = packHalf2x16(mask_vec);
+    GC_STORE1_3D_OFFSET(maskv, mask, 0, 0, 0);
+#else  /* FORWARD */
+    GC_LOAD1_3D_OFFSET(maskv, mask, 0, 0, 0);
+    mask_vec = unpackHalf2x16(maskv);
+#endif /* FORWARD */
+
+    GC_LOAD1_3D_OFFSET(inputv, src, 0, 0, 0);
+
+    input_vec  = unpackHalf2x16(inputv);
+    output_vec = mask_vec * input_vec * float(SCALE);
+    outputv    = packHalf2x16(output_vec);
+
+    GC_STORE1_3D_OFFSET(outputv, dst, 0, 0, 0);
+}
+
+#else /* DATA_TYPE_FP32 */
+
+#endif /* DATA_TYPE_FP32 */
diff --git a/src/core/GLES_COMPUTE/cs_shaders/fill_border.cs b/src/core/GLES_COMPUTE/cs_shaders/fill_border.cs
new file mode 100644
index 0000000000..01a39866c7
--- /dev/null
+++ b/src/core/GLES_COMPUTE/cs_shaders/fill_border.cs
@@ -0,0 +1,553 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
+#include "helpers.h"
+
+#if defined(DATA_TYPE_FP32)
+#ifdef FILL_IMAGE_BORDERS_REPLICATE
+BUFFER_DECLARATION(buf, 1, float, restrict);
+layout(std140) uniform shader_params
+{
+    TENSOR3D_PARAM_DECLARATION(buf);
+    uint width;
+    uint height;
+    int  start_pos_x;
+    int  start_pos_y;
+};
+
+/** Fill N pixel of the padding edge of a single channel image by replicating the closest valid pixel.
+ *
+ * @attention  The border size for top, bottom, left, right needs to be passed at the compile time.
+ * e.g. BORDER_SIZE_TOP=0 BORDER_SIZE_BOTTOM=2 BORDER_SIZE_LEFT=0 BORDER_SIZE_RIGHT=2
+ *
+ * @param[in,out] buf_ptr                           Pointer to the source image. Supported data types: F32
+ * @param[in]     buf_stride_x                      Stride of the source image in X dimension (in bytes)
+ * @param[in]     buf_step_x                        buf_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]     buf_stride_y                      Stride of the source image in Y dimension (in bytes)
+ * @param[in]     buf_step_y                        buf_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]     buf_stride_z                      Stride between images if batching images (in bytes)
+ * @param[in]     buf_step_z                        buf_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]     buf_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[in]     width                             Width of the valid region of the image
+ * @param[in]     height                            Height of the valid region of the image
+ * @param[in]     start_pos_x                       X coordinate indicating the start point of the valid region
+ * @param[in]     start_pos_y                       Y coordinate indicating the start point of the valid region
+ */
+void main()
+{
+    Image buf = CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(buf);
+
+    // Update pointer to point to the starting point of the valid region
+    buf.current_offset = uint(int(buf.current_offset) + ((start_pos_y * int(buf_stride_y) + start_pos_x * int(buf_stride_x)) >> 2));
+
+    int total_width = BORDER_SIZE_LEFT + int(width) + BORDER_SIZE_RIGHT;
+    int gid0        = int(gl_GlobalInvocationID.x);
+    int gidH        = gid0 - total_width;
+    int gidW        = gid0 - BORDER_SIZE_LEFT;
+
+    if(gidH >= 0)
+    {
+        // Handle left border
+        float left_val = LOAD4(buf, offset(buf, 0, gidH));
+        for(int i = -BORDER_SIZE_LEFT; i < 0; ++i)
+        {
+            STORE4(buf, offset(buf, i, gidH), left_val);
+        }
+        // Handle right border
+        float right_val = LOAD4(buf, offset(buf, int(width) - 1, gidH));
+        for(int i = 0; i < BORDER_SIZE_RIGHT; ++i)
+        {
+            STORE4(buf, offset(buf, int(width) + i, gidH), right_val);
+        }
+    }
+    else
+    {
+        // Get value for corners
+        int val_idx = gidW;
+        if(gidW < 0 || gidW > (int(width) - 1))
+        {
+            val_idx = gidW < 0 ? 0 : int(width) - 1;
+        }
+
+        // Handle top border
+        float top_val = LOAD4(buf, offset(buf, val_idx, 0));
+        for(int i = -BORDER_SIZE_TOP; i < 0; ++i)
+        {
+            STORE4(buf, offset(buf, gidW, i), top_val);
+        }
+        // Handle bottom border
+        float bottom_val = LOAD4(buf, offset(buf, val_idx, int(height) - 1));
+        for(int i = 0; i < BORDER_SIZE_BOTTOM; ++i)
+        {
+            STORE4(buf, offset(buf, gidW, int(height) + i), bottom_val);
+        }
+    }
+}
+#endif /* FILL_IMAGE_BORDERS_REPLICATE */
+
+#ifdef FILL_IMAGE_BORDERS_CONSTANT
+BUFFER_DECLARATION(buf, 1, float, writeonly);
+layout(std140) uniform shader_params
+{
+    TENSOR3D_PARAM_DECLARATION(buf);
+    uint  width;
+    uint  height;
+    int   start_pos_x;
+    int   start_pos_y;
+    float constant_value;
+};
+
+/** Fill N pixels of the padding edge of a single channel image with a constant value.
+ *
+ * @attention  The border size for top, bottom, left, right needs to be passed at the compile time.
+ * e.g. BORDER_SIZE_TOP=0 BORDER_SIZE_BOTTOM=2 BORDER_SIZE_LEFT=0 BORDER_SIZE_RIGHT=2
+ *
+ * @param[out] buf_ptr                           Pointer to the source image. Supported data types: F32
+ * @param[in]  buf_stride_x                      Stride of the source image in X dimension (in bytes)
+ * @param[in]  buf_step_x                        buf_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  buf_stride_y                      Stride of the source image in Y dimension (in bytes)
+ * @param[in]  buf_step_y                        buf_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  buf_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[in]  width                             Width of the valid region of the image
+ * @param[in]  height                            Height of the valid region of the image
+ * @param[in]  start_pos_x                       X coordinate indicating the start point of the valid region
+ * @param[in]  start_pos_y                       Y coordinate indicating the start point of the valid region
+ * @param[in]  constant_value                    Constant value to use to fill the edges
+ */
+void main()
+{
+    Image buf = CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(buf);
+
+    // Update pointer to point to the starting point of the valid region
+    buf.current_offset = uint(int(buf.current_offset) + ((start_pos_y * int(buf_stride_y) + start_pos_x * int(buf_stride_x)) >> 2));
+
+    int total_width = BORDER_SIZE_LEFT + int(width) + BORDER_SIZE_RIGHT;
+    int gid0        = int(gl_GlobalInvocationID.x);
+    int gidH        = gid0 - total_width;
+    int gidW        = gid0 - BORDER_SIZE_LEFT;
+
+    if(gidH >= 0)
+    {
+        // Handle left border
+        for(int i = -BORDER_SIZE_LEFT; i < 0; ++i)
+        {
+            STORE1(buf, offset(buf, i, gidH), constant_value);
+        }
+        // Handle right border
+        for(int i = 0; i < BORDER_SIZE_RIGHT; ++i)
+        {
+            STORE1(buf, offset(buf, int(width) + i, gidH), constant_value);
+        }
+    }
+    else
+    {
+        // Handle top border
+        for(int i = -BORDER_SIZE_TOP; i < 0; ++i)
+        {
+            STORE1(buf, offset(buf, gidW, i), constant_value);
+        }
+        // Handle bottom border
+        for(int i = 0; i < BORDER_SIZE_BOTTOM; ++i)
+        {
+            STORE1(buf, offset(buf, gidW, int(height) + i), constant_value);
+        }
+    }
+}
+#endif /* FILL_IMAGE_BORDERS_CONSTANT */
+
+#elif defined(DATA_TYPE_FP16)
+precision mediump float;
+
+#ifdef FILL_IMAGE_BORDERS_REPLICATE
+BUFFER_DECLARATION(buf, 1, uint, restrict);
+layout(std140) uniform shader_params
+{
+    TENSOR3D_PARAM_DECLARATION(buf);
+    uint width;
+    uint height;
+    int  start_pos_x;
+    int  start_pos_y;
+};
+
+void set_replicate(uint offset, int pos, uint replicate_value)
+{
+    uint packed_b;
+    LOAD1(packed_b, buf, offset);
+
+    vec2 b = unpackHalf2x16(packed_b);
+    vec2 c = unpackHalf2x16(replicate_value);
+
+    if(pos % 2 == 0)
+    {
+        b.x = c.y;
+    }
+    else
+    {
+        b.y = c.x;
+    }
+
+    packed_b = packHalf2x16(b);
+
+    STORE1(buf, offset, packed_b);
+}
+
+/** Fill N pixel of the padding edge of a single channel image by replicating the closest valid pixel.
+ *
+ * @attention  The border size for top, bottom, left, right needs to be passed at the compile time.
+ * e.g. BORDER_SIZE_TOP=0 BORDER_SIZE_BOTTOM=2 BORDER_SIZE_LEFT=0 BORDER_SIZE_RIGHT=2
+ *
+ * @param[in,out] buf_ptr                           Pointer to the source image. Supported data types: F16
+ * @param[in]     buf_stride_x                      Stride of the source image in X dimension (in bytes)
+ * @param[in]     buf_step_x                        buf_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]     buf_stride_y                      Stride of the source image in Y dimension (in bytes)
+ * @param[in]     buf_step_y                        buf_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]     buf_stride_z                      Stride between images if batching images (in bytes)
+ * @param[in]     buf_step_z                        buf_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]     buf_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[in]     width                             Width of the valid region of the image
+ * @param[in]     height                            Height of the valid region of the image
+ * @param[in]     start_pos_x                       X coordinate indicating the start point of the valid region
+ * @param[in]     start_pos_y                       Y coordinate indicating the start point of the valid region
+ */
+void main()
+{
+    Image buf = CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP_FP16(buf);
+
+    // Update pointer to point to the starting point of the valid region
+    buf.current_offset = uint(buf.current_offset + uint(start_pos_y) * buf_stride_y + uint(start_pos_x) * buf_stride_x);
+
+    int total_width = BORDER_SIZE_LEFT + int(width) + BORDER_SIZE_RIGHT;
+    int gid0        = int(gl_GlobalInvocationID.x);
+    int gidH        = gid0 - total_width;
+    int gidW        = gid0 - BORDER_SIZE_LEFT;
+
+    if(gidH >= 0)
+    {
+        // Handle left border
+        uint left_val;
+        LOAD1(left_val, buf, offset_fp16(buf, 0, gidH) >> uint(2));
+        for(int i = -BORDER_SIZE_LEFT; i < 0; ++i)
+        {
+            uint offset = offset_fp16(buf, i, gidH) >> 2;
+            int  pos    = i + BORDER_SIZE_LEFT;
+            if(i == -1)
+            {
+                if(pos % 2 == 0)
+                {
+                    set_replicate(offset, pos, left_val);
+                }
+            }
+            else
+            {
+                if(pos % 2 == 0)
+                {
+                    vec2 a = unpackHalf2x16(left_val);
+                    uint b = packHalf2x16(a.xx);
+                    STORE1(buf, offset, b);
+                }
+            }
+        }
+        // Handle right border
+        uint right_val;
+        LOAD1(right_val, buf, offset_fp16(buf, int(width) - 1, gidH) >> uint(2));
+        for(int i = 0; i < BORDER_SIZE_RIGHT; ++i)
+        {
+            uint offset = offset_fp16(buf, int(width) + i, gidH) >> 2;
+            int  pos    = i + BORDER_SIZE_LEFT + int(width);
+
+            if(i == 0)
+            {
+                if(pos % 2 == 0)
+                {
+                    vec2 a = unpackHalf2x16(right_val);
+                    uint b = packHalf2x16(a.yy);
+                    STORE1(buf, offset, b);
+                }
+                else
+                {
+                    set_replicate(offset, pos, right_val);
+                }
+            }
+            else
+            {
+                if(pos % 2 == 0)
+                {
+                    vec2 a = unpackHalf2x16(right_val);
+                    uint b = packHalf2x16(a.yy);
+                    STORE1(buf, offset, b);
+                }
+            }
+        }
+    }
+    else
+    {
+        // Get value for corners
+        int val_idx = gidW;
+        if(gidW < 0 || (gidW > (int(width) - 1)))
+        {
+            val_idx = gidW < 0 ? 0 : (int(width) - 1);
+        }
+
+        // Handle top border
+        uint top_val;
+        LOAD1(top_val, buf, offset_fp16(buf, val_idx, 0) >> uint(2));
+        for(int i = -BORDER_SIZE_TOP; i < 0; ++i)
+        {
+            uint offset = offset_fp16(buf, gidW, i) >> 2;
+
+            if(gid0 % 2 == 0)
+            {
+                if(gidW == (int(width) - 1))
+                {
+                    vec2 a = unpackHalf2x16(top_val);
+                    uint b = packHalf2x16(a.xx);
+                    STORE1(buf, offset, b);
+                }
+                else
+                {
+                    if(gidW < 0)
+                    {
+                        vec2 a = unpackHalf2x16(top_val);
+                        uint b;
+                        if(BORDER_SIZE_LEFT % 2 == 0)
+                        {
+                            b = packHalf2x16(a.xx);
+                        }
+                        else
+                        {
+                            b = packHalf2x16(a.yy);
+                        }
+                        STORE1(buf, offset, b);
+                    }
+                    else if(gidW >= int(width))
+                    {
+                        vec2 a = unpackHalf2x16(top_val);
+                        uint b;
+                        if((BORDER_SIZE_LEFT + int(width)) % 2 == 0)
+                        {
+                            b = packHalf2x16(a.yy);
+                        }
+                        STORE1(buf, offset, b);
+                    }
+                    else
+                    {
+                        STORE1(buf, offset, top_val);
+                    }
+                }
+            }
+        }
+        // Handle bottom border
+        uint bottom_val;
+        LOAD1(bottom_val, buf, offset_fp16(buf, val_idx, int(height) - 1) >> uint(2));
+        for(int i = 0; i < BORDER_SIZE_BOTTOM; ++i)
+        {
+            uint offset = offset_fp16(buf, gidW, int(height) + i) >> 2;
+
+            if(gid0 % 2 == 0)
+            {
+                if(gidW == (int(width) - 1))
+                {
+                    vec2 a = unpackHalf2x16(bottom_val);
+                    uint b = packHalf2x16(a.xx);
+                    STORE1(buf, offset, b);
+                }
+                else
+                {
+                    if(gidW < 0)
+                    {
+                        vec2 a = unpackHalf2x16(bottom_val);
+                        uint b;
+                        if(BORDER_SIZE_LEFT % 2 == 0)
+                        {
+                            b = packHalf2x16(a.xx);
+                        }
+                        else
+                        {
+                            b = packHalf2x16(a.yy);
+                        }
+                        STORE1(buf, offset, b);
+                    }
+                    else if(gidW >= int(width))
+                    {
+                        vec2 a = unpackHalf2x16(bottom_val);
+                        uint b;
+                        if((BORDER_SIZE_LEFT + int(width)) % 2 == 0)
+                        {
+                            b = packHalf2x16(a.yy);
+                        }
+                        STORE1(buf, offset, b);
+                    }
+                    else
+                    {
+                        STORE1(buf, offset, bottom_val);
+                    }
+                }
+            }
+        }
+    }
+}
+#endif /* FILL_IMAGE_BORDERS_REPLICATE */
+
+#ifdef FILL_IMAGE_BORDERS_CONSTANT
+BUFFER_DECLARATION(buf, 1, uint, restrict);
+
+layout(std140) uniform shader_params
+{
+    TENSOR3D_PARAM_DECLARATION(buf);
+    uint  width;
+    uint  height;
+    int   start_pos_x;
+    int   start_pos_y;
+    float constant_value;
+};
+
+void set_constant(uint offset, int pos)
+{
+    uint packed_b;
+    LOAD1(packed_b, buf, offset);
+
+    vec2 b = unpackHalf2x16(packed_b);
+
+    if(pos % 2 == 0)
+    {
+        b.x = constant_value;
+    }
+    else
+    {
+        b.y = constant_value;
+    }
+
+    packed_b = packHalf2x16(b);
+
+    STORE1(buf, offset, packed_b);
+}
+
+/** Fill N pixels of the padding edge of a single channel image with a constant value.
+ *
+ * @attention  The border size for top, bottom, left, right needs to be passed at the compile time.
+ * e.g. BORDER_SIZE_TOP=0 BORDER_SIZE_BOTTOM=2 BORDER_SIZE_LEFT=0 BORDER_SIZE_RIGHT=2
+ *
+ * @param[out] buf_ptr                           Pointer to the source image. Supported data types: F16
+ * @param[in]  buf_stride_x                      Stride of the source image in X dimension (in bytes)
+ * @param[in]  buf_step_x                        buf_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  buf_stride_y                      Stride of the source image in Y dimension (in bytes)
+ * @param[in]  buf_step_y                        buf_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  buf_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[in]  width                             Width of the valid region of the image
+ * @param[in]  height                            Height of the valid region of the image
+ * @param[in]  start_pos_x                       X coordinate indicating the start point of the valid region
+ * @param[in]  start_pos_y                       Y coordinate indicating the start point of the valid region
+ * @param[in]  constant_value                    Constant value to use to fill the edges
+ */
+void main()
+{
+    Image buf = CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP_FP16(buf);
+
+    int total_width = BORDER_SIZE_LEFT + int(width) + BORDER_SIZE_RIGHT;
+    int gid0        = int(gl_GlobalInvocationID.x);
+    int gidH        = gid0 - total_width;
+    int gidW        = gid0 - BORDER_SIZE_LEFT;
+
+    // Update pointer to point to the starting point of the valid region
+    buf.current_offset = uint(int(buf.current_offset) + ((start_pos_y * int(buf_stride_y) + start_pos_x * int(buf_stride_x))));
+
+    vec2 b = vec2(constant_value, constant_value);
+
+    uint packed_b = packHalf2x16(b);
+
+    if(gidH >= 0)
+    {
+        // Handle left border
+        for(int i = -BORDER_SIZE_LEFT; i < 0; ++i)
+        {
+            uint offset = offset_fp16(buf, i, gidH) >> 2;
+            int  pos    = i + BORDER_SIZE_LEFT;
+
+            if(i == -1)
+            {
+                if(pos % 2 == 0)
+                {
+                    set_constant(offset, pos);
+                }
+            }
+            else
+            {
+                if(pos % 2 == 0)
+                {
+                    STORE1(buf, offset, packed_b);
+                }
+            }
+        }
+        // Handle right border
+        for(int i = 0; i < BORDER_SIZE_RIGHT; ++i)
+        {
+            uint offset = offset_fp16(buf, int(width) + i, gidH) >> 2;
+            int  pos    = i + BORDER_SIZE_LEFT + int(width);
+
+            if(i == 0)
+            {
+                if(pos % 2 == 0)
+                {
+                    STORE1(buf, offset, packed_b);
+                }
+                else
+                {
+                    set_constant(offset, pos);
+                }
+            }
+            else
+            {
+                if(pos % 2 == 0)
+                {
+                    STORE1(buf, offset, packed_b);
+                }
+            }
+        }
+    }
+    else
+    {
+        // Handle top border
+        for(int i = -BORDER_SIZE_TOP; i < 0; ++i)
+        {
+            uint offset = offset_fp16(buf, gidW, i) >> 2;
+
+            if(gid0 % 2 == 0)
+            {
+                STORE1(buf, offset, packed_b);
+            }
+        }
+        // Handle bottom border
+        for(int i = 0; i < BORDER_SIZE_BOTTOM; ++i)
+        {
+            uint offset = offset_fp16(buf, gidW, int(height) + i) >> 2;
+
+            if(gid0 % 2 == 0)
+            {
+                STORE1(buf, offset, packed_b);
+            }
+        }
+    }
+}
+#endif /* FILL_IMAGE_BORDERS_CONSTANT */
+#endif /* DATA_TYPE_FP32 */
diff --git a/src/core/GLES_COMPUTE/cs_shaders/gemm.cs b/src/core/GLES_COMPUTE/cs_shaders/gemm.cs
new file mode 100755
index 0000000000..3313b88718
--- /dev/null
+++ b/src/core/GLES_COMPUTE/cs_shaders/gemm.cs
@@ -0,0 +1,623 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
+#include "helpers.h"
+
+#if defined(DATA_TYPE_FP32)
+#define LOAD8(r, name, offset) \
+    r.x = LOAD4(name, offset); \
+    r.y = LOAD4(name, offset + uint(1))
+
+#define LOAD16(r, name, offset)          \
+    r.x = LOAD4(name, offset);           \
+    r.y = LOAD4(name, offset + uint(1)); \
+    r.z = LOAD4(name, offset + uint(2)); \
+    r.w = LOAD4(name, offset + uint(3))
+
+#define STORE16(name, offset, r)         \
+    STORE4(name, offset, r.x);           \
+    STORE4(name, offset + uint(1), r.y); \
+    STORE4(name, offset + uint(2), r.z); \
+    STORE4(name, offset + uint(3), r.w)
+
+#ifdef GEMM_TRANSPOSE1xW
+BUFFER_DECLARATION(src, 1, float, readonly);
+BUFFER_DECLARATION(dst, 2, float, writeonly);
+
+layout(std140) uniform shader_params
+{
+    IMAGE_PARAM_DECLARATION(src);
+    IMAGE_PARAM_DECLARATION(dst);
+};
+
+/** This OpenGL ES kernel computes the "vector" 1x4 transposition of input matrix
+ *
+ * @param[in]  src_ptr                           Pointer to the source matrix. Supported data types: F32
+ * @param[in]  src_stride_x                      Stride of the source matrix in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source matrix in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source matrix
+ * @param[out] dst_ptr                           Pointer to the destination matrix Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination matrix in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_gx_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination matrix in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_gx_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination matrix
+ */
+void main(void)
+{
+    /* Compute address for Matrix B - source */
+    Image src = CONVERT_TO_IMAGE_STRUCT(src);
+    Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+    /* Compute address for Matrix B transposed - destination. X and Y are swapped */
+    uint dst_addr_in_bytes = (gl_GlobalInvocationID.y * uint(16) + gl_GlobalInvocationID.x * dst.stride_y + dst.offset_first_element_in_bytes) >> 2;
+    vec4 b0;
+    LOAD16(b0, src, offset(src, 0, 0));
+    STORE16(dst, dst_addr_in_bytes, b0);
+}
+#endif /* GEMM_TRANSPOSE1xW */
+
+#ifdef GEMM_INTERLEAVE4x4
+BUFFER_DECLARATION(src, 1, float, readonly);
+BUFFER_DECLARATION(dst, 2, float, writeonly);
+
+layout(std140) uniform shader_params
+{
+    IMAGE_PARAM_DECLARATION(src);
+    IMAGE_PARAM_DECLARATION(dst);
+};
+
+/** This OpenGLES kernel reshapes the input matrix interleaving the values
+ *
+ * @param[in]  src_ptr                           Pointer to the source matrix. Supported data types: F32
+ * @param[in]  src_stride_x                      Stride of the source matrix in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source matrix in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source matrix
+ * @param[out] dst_ptr                           Pointer to the destination matrix Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination matrix in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_gx_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination matrix in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_gx_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination matrix
+ */
+void main(void)
+{
+    /* Compute source and destination addresses */
+    Image src = CONVERT_TO_IMAGE_STRUCT(src);
+    Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+    int i;
+    int j;
+
+    for(i = 0; i < 4; ++i)
+    {
+        for(j = 0; j < 4; ++j)
+        {
+            float res    = LOAD4(src, offset(src, i, j));
+            uint  ofset0 = CURRENT_OFFSET(dst) + uint(i * 4 + j);
+            STORE4(dst, ofset0, res);
+        }
+    }
+}
+#endif /* GEMM_INTERLEAVE4x4 */
+
+#ifdef GEMM_ACCUMULATE_BIASES
+BUFFER_DECLARATION(accum, 1, float, restrict);
+BUFFER_DECLARATION(biases, 2, float, readonly);
+
+layout(std140) uniform shader_params
+{
+    IMAGE_PARAM_DECLARATION(accum);
+    VECTOR_PARAM_DECLARATION(biases);
+};
+
+/** This kernel accumulates each row with the biases vector
+ *
+ * @param[in, out] accum_ptr                            Pointer to the accumulate tensor. Supported data type: F32
+ * @param[in]      accum_stride_x                       Stride of the accmulate tensor in X dimension (in bytes)
+ * @param[in]      accum_step_x                         accum_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]      accum_stride_y                       Stride of the accumlulate tensor in Y dimension (in bytes)
+ * @param[in]      accum_step_y                         src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]      accum_offset_first_element_in_bytes  The offset of the first element in the accumulate tensor
+ * @param[in]      biases_ptr                           Pointer to the biases vector. Same as @p accum_ptr
+ * @param[in]      biases_stride_x                      Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]      biases_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]      biases_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ */
+void main(void)
+{
+    Image  accum  = CONVERT_TO_IMAGE_STRUCT(accum);
+    Vector biases = CONVERT_TO_VECTOR_STRUCT(biases);
+
+    for(int i = 0; i < 16; ++i)
+    {
+        float accum_value  = LOAD4(accum, CURRENT_OFFSET(accum) + uint(i));
+        float biases_value = LOAD4(biases, CURRENT_OFFSET(biases) + uint(i));
+        accum_value        = biases_value + accum_value;
+
+        // Store result in the accummulate buffer
+        STORE4(accum, CURRENT_OFFSET(accum) + uint(i), accum_value);
+    }
+}
+#endif /* GEMM_ACCUMULATE_BIASES */
+
+#ifdef GEMM_MM_INTERLEAVED_TRANSPOSED /* unvalidate */
+BUFFER_DECLARATION(src0, 1, float, readonly);
+BUFFER_DECLARATION(src1, 2, float, readonly);
+BUFFER_DECLARATION(dst, 3, float, writeonly);
+
+layout(std140) uniform shader_params
+{
+    IMAGE_PARAM_DECLARATION(src0);
+    IMAGE_PARAM_DECLARATION(src1);
+    IMAGE_PARAM_DECLARATION(dst);
+};
+
+/** This OpenGL ES kernel is optimised for Midgard. It computes the matrix multiplication between matrix A (src0) and matrix B (src1)
+ *  Matrix A and matrix B must be reshaped respectively with @ref gemm_interleave4x4_32bit and @ref gemm_transpose1x4 before running the matrix multiplication
+ *
+ * @attention The width of matrix B and the alpha's value need to be passed at compile time using WIDTH_MATRIX_B and ALPHA
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data types: F32
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension (in bytes)
+ * @param[in]  dst_step_x                         dst_gx_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension (in bytes)
+ * @param[in]  dst_step_y                         dst_gx_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination matrix
+ */
+void main()
+{
+    Image src0 = CONVERT_TO_IMAGE_STRUCT(src0);
+    Image src1 = CONVERT_TO_IMAGE_STRUCT(src1);
+    Image dst  = CONVERT_TO_IMAGE_STRUCT(dst);
+
+    /* Compute address for matrix A and B */
+    src0.current_offset = (src0.offset_first_element_in_bytes + (uint(gl_GlobalInvocationID.y) * uint(src0.stride_y))) >> uint(2);
+    src1.current_offset = (src1.offset_first_element_in_bytes + (uint(gl_GlobalInvocationID.x) * uint(src1.stride_y))) >> uint(2);
+
+    /* Compute end row address for matrix B */
+    int end_row_mtx_b = int(src1.current_offset) + int(COLS_B);
+
+    /* Reset accumulators */
+    vec4 c00 = vec4(0.0f);
+    vec4 c10 = vec4(0.0f);
+    vec4 c20 = vec4(0.0f);
+    vec4 c30 = vec4(0.0f);
+
+    // FIXME: loop unrolling really needed for GLES?
+    for(; int(src1.current_offset) <= (end_row_mtx_b - 8); src0.current_offset += uint(8), src1.current_offset += uint(8))
+    {
+        /* Load values from matrix A (interleaved) and matrix B (transposed) */
+        vec4 a0;
+        vec4 b0;
+        LOAD16(a0, src0, src0.current_offset);
+        LOAD16(b0, src1, src1.current_offset);
+
+        c00 += vec4(a0.x) * b0;
+        c10 += vec4(a0.y) * b0;
+        c20 += vec4(a0.z) * b0;
+        c30 += vec4(a0.w) * b0;
+
+        /* Load values from matrix A (interleaved) and matrix B (transposed) */
+        LOAD16(a0, src0, src0.current_offset + uint(4));
+        LOAD16(b0, src1, src1.current_offset + uint(4));
+
+        c00 += vec4(a0.x) * b0;
+        c10 += vec4(a0.y) * b0;
+        c20 += vec4(a0.z) * b0;
+        c30 += vec4(a0.w) * b0;
+    }
+
+    for(; int(src1.current_offset) < end_row_mtx_b; src0.current_offset += uint(4), src1.current_offset += uint(4))
+    {
+        /* Load values from matrix A (interleaved) and matrix B (transposed) */
+        vec4 a0;
+        vec4 b0;
+        LOAD16(a0, src0, src0.current_offset);
+        LOAD16(b0, src1, src1.current_offset);
+
+        c00 += vec4(a0.x) * b0;
+        c10 += vec4(a0.y) * b0;
+        c20 += vec4(a0.z) * b0;
+        c30 += vec4(a0.w) * b0;
+    }
+
+    /* Multiply by the weight of matrix product */
+    c00 = c00 * vec4(ALPHA);
+    c10 = c10 * vec4(ALPHA);
+    c20 = c20 * vec4(ALPHA);
+    c30 = c30 * vec4(ALPHA);
+
+    /* Store 4x4 block */
+    STORE16(dst, offset(dst, 0, 0), c00);
+    STORE16(dst, offset(dst, 0, 1), c10);
+    STORE16(dst, offset(dst, 0, 2), c20);
+    STORE16(dst, offset(dst, 0, 3), c30);
+}
+#endif /* GEMM_MM_INTERLEAVED_TRANSPOSED */
+
+#ifdef GEMM_MM_FLOATING_POINT
+BUFFER_DECLARATION(src0, 1, float, readonly);
+BUFFER_DECLARATION(src1, 2, float, readonly);
+BUFFER_DECLARATION(dst, 3, float, writeonly);
+
+layout(std140) uniform shader_params
+{
+    IMAGE_PARAM_DECLARATION(src0);
+    IMAGE_PARAM_DECLARATION(src1);
+    IMAGE_PARAM_DECLARATION(dst);
+};
+
+/** This OpenGL ES kernel computes the matrix multiplication between matrix A (src0) and matrix B (src1)
+ *  Matrix A and matrix B must be reshaped respectively with @ref gemm_interleave4x4_32bit and @ref gemm_transpose1x4 before running the matrix multiplication
+ *
+ * @attention The width of matrix B and the alpha's value need to be passed at compile time using WIDTH_MATRIX_B and ALPHA
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data types: F32
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension (in bytes)
+ * @param[in]  dst_step_x                         dst_gx_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension (in bytes)
+ * @param[in]  dst_step_y                         dst_gx_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination matrix
+ */
+void main()
+{
+    Image src0 = CONVERT_TO_IMAGE_STRUCT(src0);
+    Image src1 = CONVERT_TO_IMAGE_STRUCT(src1);
+    Image dst  = CONVERT_TO_IMAGE_STRUCT(dst);
+
+    int idx = int(gl_GlobalInvocationID.x) * int(NUM_ELEMS_PROCESSED_PER_THREAD_X);
+    /* Compute the address for the vector A and matrix B */
+    src0.current_offset = (src0_offset_first_element_in_bytes + uint(gl_GlobalInvocationID.y) * src0_stride_y * uint(NUM_ELEMS_PROCESSED_PER_THREAD_Y)) >> uint(2);
+    src1.current_offset = (src1_offset_first_element_in_bytes + uint(idx * 4)) >> uint(2);
+
+    /* Compute end row address for matrix A */
+    int end_row_vec_a = int(src0.current_offset) + ((COLS_A * 4) >> 2);
+
+    /* Reset accumulators */
+    vec4 acc0 = vec4(0.0f);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    vec4 acc1 = vec4(0.0f);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    vec4 acc2 = vec4(0.0f);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    vec4 acc3 = vec4(0.0f);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+    for(; int(src0.current_offset) <= (end_row_vec_a - 2); src0.current_offset += uint(2), src1.current_offset += uint((2 * int(src1_stride_y)) >> 2))
+    {
+        vec2 a0;
+        LOAD8(a0, src0, src0.current_offset);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+        vec2 a1;
+        LOAD8(a1, src0, src0.current_offset + (src0_stride_y >> uint(2)));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+        vec2 a2;
+        LOAD8(a2, src0, src0.current_offset + ((uint(2) * src0_stride_y) >> uint(2)));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+        vec2 a3;
+        LOAD8(a3, src0, src0.current_offset + ((uint(3) * src0_stride_y) >> uint(2)));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+        vec4 b0;
+        vec4 b1;
+        LOAD16(b0, src1, src1.current_offset);
+        LOAD16(b1, src1, src1.current_offset + (src1_stride_y >> uint(2)));
+
+        acc0 += b0 * vec4(a0.x);
+        acc0 += b1 * vec4(a0.y);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+        acc1 += b0 * vec4(a1.x);
+        acc1 += b1 * vec4(a1.y);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+        acc2 += b0 * vec4(a2.x);
+        acc2 += b1 * vec4(a2.y);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+        acc3 += b0 * vec4(a3.x);
+        acc3 += b1 * vec4(a3.y);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    }
+
+    for(; int(src0.current_offset) < end_row_vec_a; src0.current_offset += uint(1), src1.current_offset += uint(int(src1_stride_y) >> 2))
+    {
+        // Load values from matrix A
+        float a0;
+        a0 = LOAD4(src0, src0.current_offset);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+        float a1;
+        a1 = LOAD4(src0, src0.current_offset + ((uint(1) * src0_stride_y) >> uint(2)));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+        float a2;
+        a2 = LOAD4(src0, src0.current_offset + ((uint(2) * src0_stride_y) >> uint(2)));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+        float a3;
+        a3 = LOAD4(src0, src0.current_offset + ((uint(3) * src0_stride_y) >> uint(2)));
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+
+        vec4 b0;
+        LOAD16(b0, src1, src1.current_offset);
+
+        acc0 += b0 * vec4(a0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+        acc1 += b0 * vec4(a1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+        acc2 += b0 * vec4(a2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+        acc3 += b0 * vec4(a3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    }
+
+    /* Multiply by the weight of vector-matrix product */
+    acc0 = acc0 * vec4(ALPHA);
+    STORE16(dst, offset(dst, 0, 0), acc0);
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+    acc1 = acc1 * vec4(ALPHA);
+    STORE16(dst, offset(dst, 0, 1), acc1);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+    acc2 = acc2 * vec4(ALPHA);
+    STORE16(dst, offset(dst, 0, 2), acc2);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2
+#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+    acc3 = acc3 * vec4(ALPHA);
+    STORE16(dst, offset(dst, 0, 3), acc3);
+#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3
+}
+#endif /* GEMM_MM_FLOATING_POINT */
+
+#ifdef GEMM_MATRIXADDITION
+BUFFER_DECLARATION(src, 1, float, readonly);
+BUFFER_DECLARATION(dst, 2, float, restrict);
+
+layout(std140) uniform shader_params
+{
+    IMAGE_PARAM_DECLARATION(src);
+    IMAGE_PARAM_DECLARATION(dst);
+};
+
+/** This OpenGL ES kernel performs the in-place matrix addition between 2 matrices taking into account that the second matrix might be weighted by a scalar value beta:
+ *
+ * @attention The beta's value need to be passed at compile time using BETA
+ *
+ * @param[in]  src_ptr                           Pointer to the source matrix. Supported data types: F32
+ * @param[in]  src_stride_x                      Stride of the source matrix in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source matrix in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source matrix
+ * @param[out] dst_ptr                           Pointer to the destination matrix Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination matrix in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_gx_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination matrix in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_gx_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination matrix
+ */
+void main(void)
+{
+    /* Compute source and destination addresses */
+    Image src = CONVERT_TO_IMAGE_STRUCT(src);
+    Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+    /* Load values from A x B */
+    vec4 alpha_ab;
+    vec4 c;
+    vec4 out1;
+
+    LOAD16(alpha_ab, dst, dst.current_offset);
+    LOAD16(c, src, src.current_offset);
+
+    /* Computes alpha * axb + beta * c */
+    out1 = alpha_ab + vec4(BETA * c);
+
+    /* Store final result in axb matrix */
+    STORE16(dst, dst.current_offset, out1);
+}
+#endif /* GEMM_MATRIXADDITION */
+#elif defined(DATA_TYPE_FP16)
+precision mediump float;
+#ifdef GEMM_MM_FLOATING_POINT
+BUFFER_DECLARATION(src0, 1, uint, readonly);
+BUFFER_DECLARATION(src1, 2, uvec2, readonly);
+BUFFER_DECLARATION(dst, 3, uvec2, writeonly);
+
+layout(std140) uniform shader_params
+{
+    IMAGE_PARAM_DECLARATION(src0);
+    IMAGE_PARAM_DECLARATION(src1);
+    IMAGE_PARAM_DECLARATION(dst);
+};
+
+/** This OpenGL ES kernel computes the matrix multiplication between matrix A (src0) and matrix B (src1)
+ *  Matrix A and matrix B must be reshaped respectively with @ref gemm_interleave4x4_32bit and @ref gemm_transpose1x4 before running the matrix multiplication
+ *
+ * @attention The width of matrix B and the alpha's value need to be passed at compile time using WIDTH_MATRIX_B and ALPHA
+ *
+ * @param[in]  src0_ptr                           Pointer to the source matrix. Supported data types: F32
+ * @param[in]  src0_stride_x                      Stride of the source matrix in X dimension (in bytes)
+ * @param[in]  src0_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src0_stride_y                      Stride of the source matrix in Y dimension (in bytes)
+ * @param[in]  src0_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src0_offset_first_element_in_bytes The offset of the first element in the source matrix
+ * @param[in]  src1_ptr                           Pointer to the source matrix. Supported data types: same as @p src0_ptr
+ * @param[in]  src1_stride_x                      Stride of the source matrix in X dimension (in bytes)
+ * @param[in]  src1_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source matrix in Y dimension (in bytes)
+ * @param[in]  src1_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source matrix
+ * @param[out] dst_ptr                            Pointer to the destination matrix Supported data types: same as @p src0_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination matrix in X dimension (in bytes)
+ * @param[in]  dst_step_x                         dst_gx_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination matrix in Y dimension (in bytes)
+ * @param[in]  dst_step_y                         dst_gx_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination matrix
+ */
+void main()
+{
+    Image src0 = GC_CONVERT_TO_IMAGE_STRUCT(src0);
+    Image src1 = GC_CONVERT_TO_IMAGE_STRUCT(src1);
+    Image dst  = GC_CONVERT_TO_IMAGE_STRUCT(dst);
+
+    int idx = int(gl_GlobalInvocationID.x) * int(NUM_ELEMS_PROCESSED_PER_THREAD_X);
+    /* Compute the address for the vector A and matrix B */
+    src0.current_offset = (src0_offset_first_element_in_bytes + uint(gl_GlobalInvocationID.y) * src0_stride_y * uint(NUM_ELEMS_PROCESSED_PER_THREAD_Y));
+    src1.current_offset = src1_offset_first_element_in_bytes + uint(idx) * src1_stride_x;
+
+    /* Compute end row address for matrix A */
+    uint end_row_vec_a = src0.current_offset + uint(COLS_A << 1);
+
+    /* Reset accumulators */
+    vec4 acc0 = vec4(0.0f);
+
+    for(; src0.current_offset < (end_row_vec_a - uint(2)); src0.current_offset += uint(2 * 2), src1.current_offset += uint(2) * src1_stride_y)
+    {
+        uint packed_a0;
+        vec2 a0;
+
+        GC_LOAD1_2D_OFFSET(packed_a0, src0, 0, 0);
+        a0 = vec2(unpackHalf2x16(packed_a0));
+
+        uvec2 packed_b0;
+        uvec2 packed_b1;
+        vec4  b0;
+        vec4  b1;
+
+        GC_LOAD1_2D_OFFSET(packed_b0, src1, 0, 0);
+        GC_LOAD1_2D_OFFSET(packed_b1, src1, 0, 1);
+
+        b0 = vec4(unpackHalf2x16(packed_b0.x), unpackHalf2x16(packed_b0.y));
+        b1 = vec4(unpackHalf2x16(packed_b1.x), unpackHalf2x16(packed_b1.y));
+
+        acc0 += b0 * vec4(a0.x);
+        acc0 += b1 * vec4(a0.y);
+    }
+
+    for(; src0.current_offset < end_row_vec_a; src0.current_offset += uint(2 * 2), src1.current_offset += src1_stride_y)
+    {
+        uint packed_a0;
+        vec2 a0;
+
+        GC_LOAD1_2D_OFFSET(packed_a0, src0, 0, 0);
+        a0 = vec2(unpackHalf2x16(packed_a0));
+
+        uvec2 packed_b0;
+        vec4  b0;
+
+        GC_LOAD1_2D_OFFSET(packed_b0, src1, 0, 0);
+
+        b0 = vec4(unpackHalf2x16(packed_b0.x), unpackHalf2x16(packed_b0.y));
+
+        acc0 += b0 * (a0.x);
+    }
+
+    /* Multiply by the weight of vector-matrix product */
+    acc0 = acc0 * vec4(ALPHA);
+
+    uvec2 packed_d;
+    packed_d = uvec2(packHalf2x16(acc0.xy), packHalf2x16(acc0.zw));
+    GC_STORE1_2D_OFFSET(packed_d, dst, 0, 0);
+}
+#endif /* GEMM_MM_FLOATING_POINT */
+
+#ifdef GEMM_ACCUMULATE_BIASES
+BUFFER_DECLARATION(accum, 1, uvec2, restrict);
+BUFFER_DECLARATION(biases, 2, uvec2, readonly);
+
+layout(std140) uniform shader_params
+{
+    IMAGE_PARAM_DECLARATION(accum);
+    VECTOR_PARAM_DECLARATION(biases);
+};
+
+/** This kernel accumulates each row with the biases vector
+ *
+ * @param[in, out] accum_ptr                            Pointer to the accumulate tensor. Supported data type: F16
+ * @param[in]      accum_stride_x                       Stride of the accmulate tensor in X dimension (in bytes)
+ * @param[in]      accum_step_x                         accum_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]      accum_stride_y                       Stride of the accumlulate tensor in Y dimension (in bytes)
+ * @param[in]      accum_step_y                         src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]      accum_offset_first_element_in_bytes  The offset of the first element in the accumulate tensor
+ * @param[in]      biases_ptr                           Pointer to the biases vector. Same as @p accum_ptr
+ * @param[in]      biases_stride_x                      Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]      biases_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]      biases_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ */
+void main(void)
+{
+    Image  accum  = GC_CONVERT_TO_IMAGE_STRUCT(accum);
+    Vector biases = GC_CONVERT_TO_VECTOR_STRUCT(biases);
+
+    vec4  u[2];
+    uvec2 packed_s[2];
+    GC_LOAD1_2D_OFFSET(packed_s[0], accum, 0, 0);
+    GC_LOAD1_1D_OFFSET(packed_s[1], biases, 0);
+    u[0] = vec4(unpackHalf2x16(packed_s[0].x), unpackHalf2x16(packed_s[0].y));
+    u[1] = vec4(unpackHalf2x16(packed_s[1].x), unpackHalf2x16(packed_s[1].y));
+
+    vec4 tmp;
+    tmp         = u[0] + u[1];
+    packed_s[0] = uvec2(packHalf2x16(tmp.xy), packHalf2x16(tmp.zw));
+    GC_STORE1_2D_OFFSET(packed_s[0], accum, 0, 0);
+}
+#endif /* GEMM_ACCUMULATE_BIASES */
+#else  /* DATA_TYPE_F32 */
+#error Data type not supported
+#endif /* DATA_TYPE_F32 */
diff --git a/src/core/GLES_COMPUTE/cs_shaders/helpers.h b/src/core/GLES_COMPUTE/cs_shaders/helpers.h
new file mode 100644
index 0000000000..86dedf5a9c
--- /dev/null
+++ b/src/core/GLES_COMPUTE/cs_shaders/helpers.h
@@ -0,0 +1,582 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_HELPER_H
+#define ARM_COMPUTE_HELPER_H
+
+#define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val)
+
+#define VEC_DATA_TYPE_STR(type, size) type##size
+#define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size)
+
+#define CONVERT(x, type) type(x)
+
+#define PACK(value, stype, dtype) \
+    pack_##stype##_##dtype(value)
+
+#define UNPACK(value, stype, dtype) \
+    unpack_##stype##_##dtype(value)
+
+#define BUFFER_DECLARATION(name, location, type, access)          \
+    layout(std430, binding = location) access buffer name##Buffer \
+    {                                                             \
+        type name##_ptr[];                                        \
+    }
+
+#define VECTOR_PARAM_DECLARATION(name)         \
+    uint name##_stride_x;                      \
+    uint name##_step_x;                        \
+    uint name##_offset_first_element_in_bytes; \
+    uint name##_buffer_data_type_size
+
+#define IMAGE_PARAM_DECLARATION(name)          \
+    uint name##_stride_x;                      \
+    uint name##_step_x;                        \
+    uint name##_stride_y;                      \
+    uint name##_step_y;                        \
+    uint name##_offset_first_element_in_bytes; \
+    uint name##_buffer_data_type_size
+
+#define TENSOR3D_PARAM_DECLARATION(name)       \
+    uint name##_stride_x;                      \
+    uint name##_step_x;                        \
+    uint name##_stride_y;                      \
+    uint name##_step_y;                        \
+    uint name##_stride_z;                      \
+    uint name##_step_z;                        \
+    uint name##_offset_first_element_in_bytes; \
+    uint name##_buffer_data_type_size
+
+/** Structure to hold Vector information */
+struct Vector
+{
+    uint current_offset;                /**< Current offset of vector */
+    uint offset_first_element_in_bytes; /**< The offset of the first element in the source image */
+    uint stride_x;                      /**< Stride of the image in X dimension (in bytes) */
+};
+
+/** Structure to hold Image information */
+struct Image
+{
+    uint current_offset;                /**< Current offset of image */
+    uint offset_first_element_in_bytes; /**< The offset of the first element in the source image */
+    uint stride_x;                      /**< Stride of the image in X dimension (in bytes) */
+    uint stride_y;                      /**< Stride of the image in Y dimension (in bytes) */
+};
+
+/** Structure to hold 3D tensor information */
+struct Tensor3D
+{
+    uint current_offset;                /**< Current offset of tensor */
+    uint offset_first_element_in_bytes; /**< The offset of the first element in the source image */
+    uint stride_x;                      /**< Stride of the image in X dimension (in bytes) */
+    uint stride_y;                      /**< Stride of the image in Y dimension (in bytes) */
+    uint stride_z;                      /**< Stride of the image in Z dimension (in bytes) */
+};
+
+/////////////////////////////////////////////////////////////
+// TODO: old to be removed
+
+#define CONVERT_TO_VECTOR_STRUCT(name) \
+    update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x)
+
+#define CONVERT_TO_VECTOR_STRUCT_FP16(name) \
+    update_vector_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x)
+
+#define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \
+    update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0))
+
+#define CONVERT_TO_VECTOR_STRUCT_NO_STEP_FP16(name) \
+    update_vector_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0))
+
+#define CONVERT_TO_IMAGE_STRUCT(name) \
+    update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y)
+
+#define CONVERT_TO_IMAGE_STRUCT_FP16(name) \
+    update_image_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y)
+
+#define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \
+    update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0))
+
+#define CONVERT_TO_IMAGE_STRUCT_NO_STEP_FP16(name) \
+    update_image_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0))
+
+#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \
+    update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, name##_step_z)
+
+#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP_FP16(name) \
+    update_image_from_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, name##_step_z)
+
+#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
+    update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
+
+#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_FP16(name) \
+    update_image_from_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
+
+#define CONVERT_TO_TENSOR3D_STRUCT(name)                                                                                                  \
+    update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
+                                    name##_stride_z, name##_step_z)
+
+#define CONVERT_TO_TENSOR3D_STRUCT_FP16(name)                                                                                                  \
+    update_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
+                                         name##_stride_z, name##_step_z)
+
+#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \
+    update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, uint(0))
+
+#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP_FP16(name) \
+    update_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, uint(0))
+
+// FIXME: Redesign the macros if different data types are supported.
+#define LOAD4(name, offset) \
+    name##_ptr[offset]
+
+#define STORE4(name, offset, value) \
+    name##_ptr[offset] = value
+
+// Load 1 element, which size is determined by ssbo type.
+#define LOAD1(r, name, offset) \
+    r = name##_ptr[offset]
+
+#define STORE1(name, offset, value) \
+    name##_ptr[offset] = value
+
+#define LOAD2(r, name, offset) \
+    LOAD1(r[0], name, offset); \
+    LOAD1(r[1], name, (offset) + uint(1))
+
+#define STORE2(name, offset, value)            \
+    name##_ptr[offset]             = value[0]; \
+    name##_ptr[(offset) + uint(1)] = value[1]
+
+#define LOAD3(r, name, offset)             \
+    LOAD1(r[0], name, offset);             \
+    LOAD1(r[1], name, (offset) + uint(1)); \
+    LOAD1(r[2], name, (offset) + uint(2))
+
+#define CURRENT_OFFSET(name) \
+    name.current_offset
+
+/** Wrap vector information into an Vector structure, and make the offset to be this workitem's position.
+ *
+ * @param[in] offset_first_element_in_bytes The offset of the first element in the source vector
+ * @param[in] stride_x                      Stride of the vector in X dimension (in bytes)
+ * @param[in] step_x                        stride_x * number of elements along X processed per workitem(in bytes)
+ *
+ * @return An vector object
+ */
+Vector update_vector_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x)
+{
+    Vector vector;
+    vector.offset_first_element_in_bytes = offset_first_element_in_bytes;
+    vector.stride_x                      = stride_x;
+    vector.current_offset                = (vector.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x) >> 2;
+
+    return vector;
+}
+
+/** Wrap vector information into an Vector structure, and make the offset to be this workitem's position.
+ *
+ * @param[in] offset_first_element_in_bytes The offset of the first element in the source vector
+ * @param[in] stride_x                      Stride of the vector in X dimension (in bytes)
+ * @param[in] step_x                        stride_x * number of elements along X processed per workitem(in bytes)
+ *
+ * @return An vector object
+ */
+Vector update_vector_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x)
+{
+    Vector vector;
+    vector.offset_first_element_in_bytes = offset_first_element_in_bytes;
+    vector.stride_x                      = stride_x;
+    vector.current_offset                = vector.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x;
+
+    return vector;
+}
+
+/** Wrap image information into an Image structure, and make the offset to be this workitem's position.
+ *
+ * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[in] stride_x                      Stride of the image in X dimension (in bytes)
+ * @param[in] step_x                        stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] stride_y                      Stride of the image in Y dimension (in bytes)
+ * @param[in] step_y                        stride_y * number of elements along Y processed per workitem(in bytes)
+ *
+ * @return An image object
+ */
+Image update_image_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
+{
+    Image img;
+    img.offset_first_element_in_bytes = offset_first_element_in_bytes;
+    img.stride_x                      = stride_x;
+    img.stride_y                      = stride_y;
+    img.current_offset                = (img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y) >> 2;
+
+    return img;
+}
+
+/** Wrap image information into an Image structure, and make the offset to be this workitem's position.
+ *
+ * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[in] stride_x                      Stride of the image in X dimension (in bytes)
+ * @param[in] step_x                        stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] stride_y                      Stride of the image in Y dimension (in bytes)
+ * @param[in] step_y                        stride_y * number of elements along Y processed per workitem(in bytes)
+ *
+ * @return An image object
+ */
+Image update_image_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
+{
+    Image img;
+    img.offset_first_element_in_bytes = offset_first_element_in_bytes;
+    img.stride_x                      = stride_x;
+    img.stride_y                      = stride_y;
+    img.current_offset                = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y;
+
+    return img;
+}
+
+/** Wrap 3D tensor information into an image structure, and make the offset to be this workitem's position.
+ *
+ * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[in] stride_x                      Stride of the image in X dimension (in bytes)
+ * @param[in] step_x                        stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] stride_y                      Stride of the image in Y dimension (in bytes)
+ * @param[in] step_y                        stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] stride_z                      Stride of the image in Z dimension (in bytes)
+ * @param[in] step_z                        stride_z * number of elements along Z processed per workitem(in bytes)
+ *
+ * @return A 2D Image object
+ */
+Image update_image_from_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
+{
+    Image img;
+    img.offset_first_element_in_bytes = offset_first_element_in_bytes;
+    img.stride_x                      = stride_x;
+    img.stride_y                      = stride_y;
+    img.current_offset                = (img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z) >> 2;
+
+    return img;
+}
+
+/** Wrap 3D tensor information into an image structure, and make the offset to be this workitem's position.
+ *
+ * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[in] stride_x                      Stride of the image in X dimension (in bytes)
+ * @param[in] step_x                        stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] stride_y                      Stride of the image in Y dimension (in bytes)
+ * @param[in] step_y                        stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] stride_z                      Stride of the image in Z dimension (in bytes)
+ * @param[in] step_z                        stride_z * number of elements along Z processed per workitem(in bytes)
+ *
+ * @return A 2D Image object
+ */
+Image update_image_from_tensor3D_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
+{
+    Image img;
+    img.offset_first_element_in_bytes = offset_first_element_in_bytes;
+    img.stride_x                      = stride_x;
+    img.stride_y                      = stride_y;
+    img.current_offset                = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z;
+
+    return img;
+}
+
+/** Wrap 3D tensor information into an tensor structure, and make the offset to be this workitem's position.
+ *
+ * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[in] stride_x                      Stride of the image in X dimension (in bytes)
+ * @param[in] step_x                        stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] stride_y                      Stride of the image in Y dimension (in bytes)
+ * @param[in] step_y                        stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] stride_z                      Stride of the image in Z dimension (in bytes)
+ * @param[in] step_z                        stride_z * number of elements along Z processed per workitem(in bytes)
+ *
+ * @return A 3D tensor object
+ */
+Tensor3D update_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
+{
+    Tensor3D tensor;
+    tensor.offset_first_element_in_bytes = offset_first_element_in_bytes;
+    tensor.stride_x                      = stride_x;
+    tensor.stride_y                      = stride_y;
+    tensor.stride_z                      = stride_z;
+    tensor.current_offset                = (tensor.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z) >> 2;
+
+    return tensor;
+}
+
+/** Wrap 3D tensor information into an tensor structure, and make the offset to be this workitem's position.
+ *
+ * @param[in] offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[in] stride_x                      Stride of the image in X dimension (in bytes)
+ * @param[in] step_x                        stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] stride_y                      Stride of the image in Y dimension (in bytes)
+ * @param[in] step_y                        stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] stride_z                      Stride of the image in Z dimension (in bytes)
+ * @param[in] step_z                        stride_z * number of elements along Z processed per workitem(in bytes)
+ *
+ * @return A 3D tensor object
+ */
+Tensor3D update_tensor3D_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
+{
+    Tensor3D tensor;
+    tensor.offset_first_element_in_bytes = offset_first_element_in_bytes;
+    tensor.stride_x                      = stride_x;
+    tensor.stride_y                      = stride_y;
+    tensor.stride_z                      = stride_z;
+    tensor.current_offset                = tensor.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z;
+
+    return tensor;
+}
+
+/** Get the pointer position of a Vector
+ *
+ * @param[in] vec Pointer to the starting position of the buffer
+ * @param[in] x   Relative X position
+ */
+uint vector_offset(Vector vec, int x)
+{
+    return CONVERT(CONVERT(vec.current_offset << 2, int) + x * CONVERT(vec.stride_x, int), uint) >> 2;
+}
+
+/** Get the pointer position of a Vector
+ *
+ * @param[in] vec Pointer to the starting position of the buffer
+ * @param[in] x   Relative X position
+ */
+uint vector_offset_fp16(Vector vec, int x)
+{
+    return CONVERT(CONVERT(vec.current_offset, int) + x * CONVERT(vec.stride_x, int), uint);
+}
+
+/** Get the pointer position of a Image
+ *
+ * @param[in] img Pointer to the starting position of the buffer
+ * @param[in] x   Relative X position
+ * @param[in] y   Relative Y position
+ */
+uint offset(Image img, int x, int y)
+{
+    return CONVERT(CONVERT(img.current_offset << 2, int) + x * CONVERT(img.stride_x, int) + y * CONVERT(img.stride_y, int), uint) >> 2;
+}
+
+/** Get the pointer position of a Image
+ *
+ * @param[in] img Pointer to the starting position of the buffer
+ * @param[in] x   Relative X position
+ * @param[in] y   Relative Y position
+ */
+uint offset_fp16(Image img, int x, int y)
+{
+    return CONVERT(CONVERT(img.current_offset, int) + x * CONVERT(img.stride_x, int) + y * CONVERT(img.stride_y, int), uint);
+}
+
+/** Get the pointer position of a Tensor3D
+ *
+ * @param[in] tensor Pointer to the starting postion of the buffer
+ * @param[in] x      Relative X position
+ * @param[in] y      Relative Y position
+ * @param[in] z      Relative Z position
+ */
+uint tensor3D_offset(Tensor3D tensor, int x, int y, int z)
+{
+    return CONVERT(CONVERT(tensor.current_offset << 2, int) + x * CONVERT(tensor.stride_x, int) + y * CONVERT(tensor.stride_y, int) + z * CONVERT(tensor.stride_z, int), uint) >> 2;
+}
+
+/** Get the pointer position of a Tensor3D
+ *
+ * @param[in] tensor Pointer to the starting postion of the buffer
+ * @param[in] x      Relative X position
+ * @param[in] y      Relative Y position
+ * @param[in] z      Relative Z position
+ */
+uint tensor3D_offset_fp16(Tensor3D tensor, int x, int y, int z)
+{
+    return CONVERT(CONVERT(tensor.current_offset, int) + x * CONVERT(tensor.stride_x, int) + y * CONVERT(tensor.stride_y, int) + z * CONVERT(tensor.stride_z, int), uint);
+}
+
+/////////////////////////////////////////////////////////////
+// new one
+
+#define GC_CONVERT_TO_VECTOR_STRUCT(name) \
+    gc_update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x)
+
+#define GC_CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \
+    gc_update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0))
+
+#define GC_CONVERT_TO_IMAGE_STRUCT(name) \
+    gc_update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y)
+
+#define GC_CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \
+    gc_update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0))
+
+#define GC_CONVERT_TO_TENSOR3D_STRUCT(name)                                                                                                  \
+    gc_update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \
+                                       name##_stride_z, name##_step_z)
+
+#define GC_CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \
+    gc_update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, uint(0))
+
+#define GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \
+    gc_update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z)
+
+#define GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \
+    gc_update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, name##_step_z)
+
+Vector gc_update_vector_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x)
+{
+    Vector vector;
+    vector.offset_first_element_in_bytes = offset_first_element_in_bytes;
+    vector.stride_x                      = stride_x;
+    vector.current_offset                = vector.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x;
+
+    return vector;
+}
+
+Image gc_update_image_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y)
+{
+    Image img;
+    img.offset_first_element_in_bytes = offset_first_element_in_bytes;
+    img.stride_x                      = stride_x;
+    img.stride_y                      = stride_y;
+    img.current_offset                = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y;
+
+    return img;
+}
+
+Tensor3D gc_update_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
+{
+    Tensor3D tensor;
+    tensor.offset_first_element_in_bytes = offset_first_element_in_bytes;
+    tensor.stride_x                      = stride_x;
+    tensor.stride_y                      = stride_y;
+    tensor.stride_z                      = stride_z;
+    tensor.current_offset                = tensor.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z;
+
+    return tensor;
+}
+
+Image gc_update_image_from_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z)
+{
+    Image img;
+    img.offset_first_element_in_bytes = offset_first_element_in_bytes;
+    img.stride_x                      = stride_x;
+    img.stride_y                      = stride_y;
+    img.current_offset                = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z;
+
+    return img;
+}
+
+#define GC_CURRENT_OFFSET(name) \
+    name.current_offset
+
+uint gc_vector_offset(Vector vec, int x)
+{
+    return CONVERT(CONVERT(vec.current_offset, int) + x * CONVERT(vec.stride_x, int), uint);
+}
+
+uint gc_image_offset(Image img, int x, int y)
+{
+    return CONVERT(CONVERT(img.current_offset, int) + x * CONVERT(img.stride_x, int) + y * CONVERT(img.stride_y, int), uint);
+}
+
+uint gc_tensor3D_offset(Tensor3D tensor, int x, int y, int z)
+{
+    return CONVERT(CONVERT(tensor.current_offset, int) + x * CONVERT(tensor.stride_x, int) + y * CONVERT(tensor.stride_y, int) + z * CONVERT(tensor.stride_z, int), uint);
+}
+
+// load/store number of element depends on buffer type
+#define GC_LOAD1(r, name, offset) \
+    r = name##_ptr[offset]
+
+#define GC_LOAD2(r, name, offset) \
+    GC_LOAD1(r[0], name, offset); \
+    GC_LOAD1(r[1], name, (offset) + uint(1))
+
+#define GC_LOAD3(r, name, offset)             \
+    GC_LOAD1(r[0], name, offset);             \
+    GC_LOAD1(r[1], name, (offset) + uint(1)); \
+    GC_LOAD1(r[2], name, (offset) + uint(2))
+
+#define GC_STORE1(value, name, offset) \
+    name##_ptr[offset] = value
+
+#define GC_STORE2(value, name, offset) \
+    GC_STORE1(value[0], name, offset); \
+    GC_STORE1(value[1], name, (offset) + uint(1))
+
+#define GC_STORE3(value, name, offset)             \
+    GC_STORE1(value[0], name, offset);             \
+    GC_STORE1(value[1], name, (offset) + uint(1)); \
+    GC_STORE1(value[2], name, (offset) + uint(2))
+
+// has to manually expand them since not supported by compiler
+#define GC_LOAD1_1D_OFFSET(r, name, x) \
+    GC_LOAD1(r, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size)
+
+#define GC_LOAD1_2D_OFFSET(r, name, x, y) \
+    GC_LOAD1(r, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size)
+
+#define GC_LOAD1_3D_OFFSET(r, name, x, y, z) \
+    GC_LOAD1(r, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size)
+
+#define GC_STORE1_1D_OFFSET(value, name, x) \
+    GC_STORE1(value, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size)
+
+#define GC_STORE1_2D_OFFSET(value, name, x, y) \
+    GC_STORE1(value, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size)
+
+#define GC_STORE1_3D_OFFSET(value, name, x, y, z) \
+    GC_STORE1(value, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size)
+
+#define GC_LOAD2_1D_OFFSET(r, name, x) \
+    GC_LOAD2(r, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size)
+
+#define GC_LOAD2_2D_OFFSET(r, name, x, y) \
+    GC_LOAD2(r, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size)
+
+#define GC_LOAD2_3D_OFFSET(r, name, x, y, z) \
+    GC_LOAD2(r, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size)
+
+#define GC_STORE2_1D_OFFSET(value, name, x) \
+    GC_STORE2(value, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size)
+
+#define GC_STORE2_2D_OFFSET(value, name, x, y) \
+    GC_STORE2(value, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size)
+
+#define GC_STORE2_3D_OFFSET(value, name, x, y, z) \
+    GC_STORE2(value, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size)
+
+#define GC_LOAD3_1D_OFFSET(r, name, x) \
+    GC_LOAD3(r, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size)
+
+#define GC_LOAD3_2D_OFFSET(r, name, x, y) \
+    GC_LOAD3(r, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size)
+
+#define GC_LOAD3_3D_OFFSET(r, name, x, y, z) \
+    GC_LOAD3(r, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size)
+
+/////////////////////////////////////////////////////////////
+
+#endif // _HELPER_H
diff --git a/src/core/GLES_COMPUTE/cs_shaders/normalization_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/normalization_layer.cs
new file mode 100755
index 0000000000..5699340c14
--- /dev/null
+++ b/src/core/GLES_COMPUTE/cs_shaders/normalization_layer.cs
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
+
+#include "helpers.h"
+
+layout(std140) uniform shader_params
+{
+    TENSOR3D_PARAM_DECLARATION(src1);
+    TENSOR3D_PARAM_DECLARATION(src2);
+    TENSOR3D_PARAM_DECLARATION(dst);
+};
+
+BUFFER_DECLARATION(src1, 1, float, readonly);
+BUFFER_DECLARATION(src2, 2, float, readonly);
+BUFFER_DECLARATION(dst, 3, float, writeonly);
+
+#ifdef CROSS_MAP
+/** Apply cross map normalization.
+ *
+ * @note Alpha parameter / norm_size should be given as a preprocessor argument using "#define COEFF x"
+ * @note BETA parameter in the normalization equation should be given as a preprocessor argument using "#define BETA x"
+ * @note KAPPA parameter in the normalization equation should be given as a preprocessor argument using "#define KAPPA x"
+ * @note Number of elements on the right or left side to normalize across should be given as a preprocessor argument using "#define RADIUS x"
+ *
+ * @param[in]  src1_ptr                                    Pointer to the first source tensor. Supported data types: F32
+ * @param[in]  src1_stride_x                               Stride of the first source tensor in X dimension (in bytes)
+ * @param[in]  src1_step_x                                 src1_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                               Stride of the first source tensor in Y dimension (in bytes)
+ * @param[in]  src1_step_y                                 src1_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src1_stride_z                               Stride of the first source tensor in Z dimension (in bytes)
+ * @param[in]  src1_step_z                                 src1_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes          The offset of the first element in the first source tensor
+ * @param[in]  src2_ptr                                    Pointer to the second source tensor. Supported data types: Same as @p src1_ptr
+ * @param[in]  src2_stride_x                               Stride of the second source tensor in X dimension (in bytes)
+ * @param[in]  src2_step_x                                 src2_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                               Stride of the second source tensor in Y dimension (in bytes)
+ * @param[in]  src2_step_y                                 src2_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src2_stride_z                               Stride of the second source tensor in Z dimension (in bytes)
+ * @param[in]  src2_step_z                                 src2_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes          The offset of the second element in the second source tensor
+ * @param[out] dst_ptr                                     Pointer to the destination tensor. Supported data types: Same as @p src1_ptr
+ * @param[in]  dst_stride_x                                Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                                  dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                                Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                                  dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                                Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                                  dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes           The offset of the first element in the destination tensor
+ */
+void main(void)
+{
+    Tensor3D src1 = CONVERT_TO_TENSOR3D_STRUCT(src1);
+    Tensor3D src2 = CONVERT_TO_TENSOR3D_STRUCT(src2);
+    Tensor3D dst  = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+    float acc = 0.0;
+
+    int num_of_slices = int(gl_NumWorkGroups.z * gl_WorkGroupSize.z);
+    int current_slice = int(gl_GlobalInvocationID.z);
+
+    int left_slice  = max(current_slice - int(RADIUS), int(0));
+    int right_slice = min(current_slice + int(RADIUS), int(num_of_slices - 1));
+
+    for(int i = left_slice; i <= right_slice; i++)
+    {
+        acc += src2_ptr[tensor3D_offset(src2, 0, 0, i - current_slice)];
+    }
+
+    float normalized = pow(float(KAPPA) + float(COEFF) * acc, float(BETA));
+
+    float normalized_pixel = (src1_ptr[src1.current_offset]) / normalized;
+
+    dst_ptr[dst.current_offset] = normalized_pixel;
+}
+
+#elif defined(IN_MAP_1D)
+/** Apply in map normalization.
+ *
+ * @note Alpha parameter / norm_size should be given as a preprocessor argument using "#define COEFF x"
+ * @note BETA parameter in the normalization equation should be given as a preprocessor argument using "#define BETA x"
+ * @note KAPPA parameter in the normalization equation should be given as a preprocessor argument using "#define KAPPA x"
+ * @note Number of elements on the right or left side to normalize across should be given as a preprocessor argument using "#define RADIUS x"
+ *
+ * @param[in]  src1_ptr                                    Pointer to the first source tensor. Supported data types: F32
+ * @param[in]  src1_stride_x                               Stride of the first source tensor in X dimension (in bytes)
+ * @param[in]  src1_step_x                                 src1_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                               Stride of the first source tensor in Y dimension (in bytes)
+ * @param[in]  src1_step_y                                 src1_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src1_stride_z                               Stride of the first source tensor in Z dimension (in bytes)
+ * @param[in]  src1_step_z                                 src1_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes          The offset of the first element in the first source tensor
+ * @param[in]  src2_ptr                                    Pointer to the second source tensor. Supported data types: Same as @p src1_ptr
+ * @param[in]  src2_stride_x                               Stride of the second source tensor in X dimension (in bytes)
+ * @param[in]  src2_step_x                                 src2_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                               Stride of the second source tensor in Y dimension (in bytes)
+ * @param[in]  src2_step_y                                 src2_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src2_stride_z                               Stride of the second source tensor in Z dimension (in bytes)
+ * @param[in]  src2_step_z                                 src2_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes          The offset of the second element in the second source tensor
+ * @param[out] dst_ptr                                     Pointer to the destination tensor. Supported data types: Same as @p src1_ptr
+ * @param[in]  dst_stride_x                                Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                                  dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                                Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                                  dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                                Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                                  dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes           The offset of the first element in the destination tensor
+ */
+void main(void)
+{
+    Tensor3D src1 = CONVERT_TO_TENSOR3D_STRUCT(src1);
+    Tensor3D src2 = CONVERT_TO_TENSOR3D_STRUCT(src2);
+    Tensor3D dst  = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+    float acc = 0.0;
+
+    int num_of_items_x = int(gl_NumWorkGroups.x * gl_WorkGroupSize.x);
+    int current_pos    = int(gl_GlobalInvocationID.x);
+
+    int left_pos  = max(current_pos - int(RADIUS), int(0));
+    int right_pos = min(current_pos + int(RADIUS), int(num_of_items_x + -1));
+
+    for(int i = left_pos; i <= right_pos; i++)
+    {
+        acc += src2_ptr[tensor3D_offset(src2, i - current_pos, 0, 0)];
+    }
+
+    float normalized = pow(float(KAPPA) + float(COEFF) * acc, float(BETA));
+
+    float normalized_pixel = (src1_ptr[src1.current_offset]) / normalized;
+
+    dst_ptr[dst.current_offset] = normalized_pixel;
+}
+#endif /*CROSS_MAP*/
diff --git a/src/core/GLES_COMPUTE/cs_shaders/pixelwise_mul_float.cs b/src/core/GLES_COMPUTE/cs_shaders/pixelwise_mul_float.cs
new file mode 100644
index 0000000000..031687af0c
--- /dev/null
+++ b/src/core/GLES_COMPUTE/cs_shaders/pixelwise_mul_float.cs
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
+#include "helpers.h"
+
+layout(std140) uniform shader_params
+{
+    TENSOR3D_PARAM_DECLARATION(src1);
+    TENSOR3D_PARAM_DECLARATION(src2);
+    TENSOR3D_PARAM_DECLARATION(dst);
+};
+
+BUFFER_DECLARATION(src1, 1, float, readonly);
+BUFFER_DECLARATION(src2, 2, float, readonly);
+BUFFER_DECLARATION(dst, 3, float, writeonly);
+
+/** Performs a pixelwise multiplication with float scale of either integer or float inputs.
+ *
+ * @param[in]  src1_ptr                           Pointer to the source image. Supported data types: F32
+ * @param[in]  src1_stride_x                      Stride of the source image in X dimension (in bytes)
+ * @param[in]  src1_step_x                        src1_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src1_stride_y                      Stride of the source image in Y dimension (in bytes)
+ * @param[in]  src1_step_y                        src1_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src1_stride_z                      Stride of the source image in Y dimension (in bytes)
+ * @param[in]  src1_step_z                        src1_stride_z * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src1_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[in]  src2_ptr                           Pointer to the source image. Supported data types: Same as @p src1_ptr
+ * @param[in]  src2_stride_x                      Stride of the source image in X dimension (in bytes)
+ * @param[in]  src2_step_x                        src2_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src2_stride_y                      Stride of the source image in Y dimension (in bytes)
+ * @param[in]  src2_step_y                        src2_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src2_stride_z                      Stride of the source image in Y dimension (in bytes)
+ * @param[in]  src2_step_z                        src2_stride_z * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src2_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] dst_ptr                            Pointer to the destination image. Supported data types: Same as @p src1_ptr
+ * @param[in]  dst_stride_x                       Stride of the destination image in X dimension (in bytes)
+ * @param[in]  dst_step_x                         dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                       Stride of the destination image in Y dimension (in bytes)
+ * @param[in]  dst_step_y                         dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                       Stride of the destination image in Y dimension (in bytes)
+ * @param[in]  dst_step_z                         dst_stride_z * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes  The offset of the first element in the destination image
+ * @param[in]  scale                              Float scaling factor. Supported data types: F32
+ */
+void main()
+{
+    // Get pixels pointer
+    Tensor3D src1 = CONVERT_TO_TENSOR3D_STRUCT(src1);
+    Tensor3D src2 = CONVERT_TO_TENSOR3D_STRUCT(src2);
+    Tensor3D dst  = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+    dst_ptr[dst.current_offset] = (src1_ptr[src1.current_offset] * src2_ptr[src2.current_offset] * float(SCALE));
+}
diff --git a/src/core/GLES_COMPUTE/cs_shaders/pooling_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/pooling_layer.cs
new file mode 100644
index 0000000000..1e0fee4688
--- /dev/null
+++ b/src/core/GLES_COMPUTE/cs_shaders/pooling_layer.cs
@@ -0,0 +1,1444 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
+#include "helpers.h"
+
+#if defined(DATA_TYPE_FP32)
+
+float calculate_max(const int, Tensor3D, const int, const int, const int, const int, const int, const int);
+float calculate_avg(const int, Tensor3D, const int, const int, const int, const int, const int, const int);
+
+BUFFER_DECLARATION(src, 1, float, readonly);
+BUFFER_DECLARATION(dst, 2, float, writeonly);
+
+layout(std140) uniform shader_params
+{
+    TENSOR3D_PARAM_DECLARATION(src);
+    TENSOR3D_PARAM_DECLARATION(dst);
+};
+
+#define LOAD8(r, name, offset) \
+    r.x = LOAD4(name, offset); \
+    r.y = LOAD4(name, offset + uint(1))
+
+#define LOAD16(r, name, offset)          \
+    r.x = LOAD4(name, offset);           \
+    r.y = LOAD4(name, offset + uint(1)); \
+    r.z = LOAD4(name, offset + uint(2)); \
+    r.w = LOAD4(name, offset + uint(3))
+
+#define STORE16(name, offset, r)         \
+    STORE4(name, offset, r.x);           \
+    STORE4(name, offset + uint(1), r.y); \
+    STORE4(name, offset + uint(2), r.z); \
+    STORE4(name, offset + uint(3), r.w)
+
+#if defined(POOL_AVG) || defined(POOL_L2)
+#define POOL_OP(res, a, b) ((res) = (a) + (b))
+#define POOL_OP_float(res, a, b) (res = a + b)
+#define POOL_OP_vec2(res, a, b) ((res) = (a) + (b))
+#else /* defined(POOL_AVG) || defined(POOL_L2) */
+#define POOL_OP(res, a, b)        \
+    (res) = (a);                  \
+    if(isnan(a.x) || (a.x < b.x)) \
+    {                             \
+        res.x = b.x;              \
+    }                             \
+    if(isnan(a.y) || (a.y < b.y)) \
+    {                             \
+        res.y = b.y;              \
+    }                             \
+    if(isnan(a.z) || (a.z < b.z)) \
+    {                             \
+        res.z = b.z;              \
+    }                             \
+    if(isnan(a.w) || (a.w < b.w)) \
+    {                             \
+        res.w = b.w;              \
+    }
+#define POOL_OP_float(res, a, b) \
+    (res) = (a);                 \
+    if(isnan(a) || (a < b))      \
+    {                            \
+        res = b;                 \
+    }
+#define POOL_OP_vec2(res, a, b)   \
+    (res) = (a);                  \
+    if(isnan(a.x) || (a.x < b.x)) \
+    {                             \
+        res.x = b.x;              \
+    }                             \
+    if(isnan(a.y) || (a.y < b.y)) \
+    {                             \
+        res.y = b.y;              \
+    }
+#endif /* defined(POOL_AVG) || defined(POOL_L2) */
+
+#if defined(POOL_L2)
+#define POW2_OP(x, vec_size) ((x) * (x))
+#else /* defined(POOL_L2) */
+#define POW2_OP(x, vec_size) (x)
+#endif /* defined(POOL_L2) */
+
+#define DIV_OP(x, y) (x * (1.f / y))
+#define SQRT_OP(x) sqrt((x))
+
+#if defined(POOL_SIZE)
+// Set the initial value for the pooling operation accordingly with the data type
+#if defined(POOL_AVG) || defined(POOL_L2)
+#define INITIAL_VALUE 0.0f
+#else /* defined(POOL_AVG) || defined(POOL_L2) */
+#define INITIAL_VALUE -3.402823466385289e+38
+#endif // POOL_AVG
+#endif //POOL_SIZE
+
+#define POOLING3x3_STRIDE1(res, input, output)                                                                     \
+    vec4 data00;                                                                                                   \
+    vec2 data01;                                                                                                   \
+    vec4 data10;                                                                                                   \
+    vec2 data11;                                                                                                   \
+    vec4 data20;                                                                                                   \
+    vec2 data21;                                                                                                   \
+    LOAD16(data00, input, tensor3D_offset(input, 0, 0, 0));                                                        \
+    LOAD8(data01, input, tensor3D_offset(input, 0, 0, 0) + uint(4));                                               \
+    LOAD16(data10, input, tensor3D_offset(input, 0, 1, 0));                                                        \
+    LOAD8(data11, input, tensor3D_offset(input, 0, 1, 0) + uint(4));                                               \
+    LOAD16(data20, input, tensor3D_offset(input, 0, 2, 0));                                                        \
+    LOAD8(data21, input, tensor3D_offset(input, 0, 2, 0) + uint(4));                                               \
+    data00 = POW2_OP(data00, 4);                                                                                   \
+    data01 = POW2_OP(data01, 2);                                                                                   \
+    data10 = POW2_OP(data10, 4);                                                                                   \
+    data11 = POW2_OP(data11, 2);                                                                                   \
+    data20 = POW2_OP(data20, 4);                                                                                   \
+    data21 = POW2_OP(data21, 2);                                                                                   \
+    \
+    vec4 values000;                                                                                                \
+    vec4 values001;                                                                                                \
+    vec4 values010;                                                                                                \
+    vec4 values100;                                                                                                \
+    vec4 values101;                                                                                                \
+    vec4 values11;                                                                                                 \
+    vec4 values200;                                                                                                \
+    vec4 values201;                                                                                                \
+    vec4 values21;                                                                                                 \
+    values000.xyzw = data00.xyzy;                                                                                  \
+    values001.xyzw = data00.zwzw;                                                                                  \
+    values010.x    = data01.x;                                                                                     \
+    values010.y    = data00.w;                                                                                     \
+    values010.zw   = data01.xy;                                                                                    \
+    values100.xyzw = data10.xyzy;                                                                                  \
+    values101.xyzw = data10.zwzw;                                                                                  \
+    values11.x     = data11.x;                                                                                     \
+    values11.y     = data10.w;                                                                                     \
+    values11.zw    = data11.xy;                                                                                    \
+    values200.xyzw = data20.xyzy;                                                                                  \
+    values201.xyzw = data20.zwzw;                                                                                  \
+    values21.x     = data21.x;                                                                                     \
+    values21.y     = data20.w;                                                                                     \
+    values21.zw    = data21.xy;                                                                                    \
+    POOL_OP(values000.xyzw, values000.xyzw, values100.xyzw);                                                       \
+    POOL_OP(values001.xyzw, values001.xyzw, values101.xyzw);                                                       \
+    POOL_OP(values010.xyzw, values010.xyzw, values11.xyzw);                                                        \
+    POOL_OP(values000.xyzw, values000.xyzw, values200.xyzw);                                                       \
+    POOL_OP(values001.xyzw, values001.xyzw, values201.xyzw);                                                       \
+    POOL_OP(values010.xyzw, values010.xyzw, values21.xyzw);                                                        \
+    POOL_OP(res.xyzw, vec4(values000.xw, values001.z, values010.y), vec4(values000.y, values001.xw, values010.z)); \
+    POOL_OP(res.xyzw, res.xyzw, vec4(values000.z, values001.y, values010.xw))
+
+#define POOLING3x3_STRIDE2(res, input, output)                                                                     \
+    vec4  data000;                                                                                                 \
+    vec4  data001;                                                                                                 \
+    float data010;                                                                                                 \
+    vec4  data100;                                                                                                 \
+    vec4  data101;                                                                                                 \
+    float data11;                                                                                                  \
+    vec4  data200;                                                                                                 \
+    vec4  data201;                                                                                                 \
+    float data21;                                                                                                  \
+    LOAD16(data000, input, tensor3D_offset(input, 0, 0, 0));                                                       \
+    LOAD16(data001, input, tensor3D_offset(input, 0, 0, 0) + uint(4));                                             \
+    data010 = LOAD4(input, tensor3D_offset(input, 0, 0, 0) + uint(8));                                             \
+    LOAD16(data100, input, tensor3D_offset(input, 0, 1, 0));                                                       \
+    LOAD16(data101, input, tensor3D_offset(input, 0, 1, 0) + uint(4));                                             \
+    data11 = LOAD4(input, tensor3D_offset(input, 0, 1, 0) + uint(8));                                              \
+    LOAD16(data200, input, tensor3D_offset(input, 0, 2, 0));                                                       \
+    LOAD16(data201, input, tensor3D_offset(input, 0, 2, 0) + uint(4));                                             \
+    data21  = LOAD4(input, tensor3D_offset(input, 0, 2, 0) + uint(8));                                             \
+    data000 = POW2_OP(data000, 4);                                                                                 \
+    data001 = POW2_OP(data001, 4);                                                                                 \
+    data010 = POW2_OP(data010, 1);                                                                                 \
+    data100 = POW2_OP(data100, 4);                                                                                 \
+    data101 = POW2_OP(data101, 4);                                                                                 \
+    data11  = POW2_OP(data11, 1);                                                                                  \
+    data200 = POW2_OP(data200, 4);                                                                                 \
+    data201 = POW2_OP(data201, 4);                                                                                 \
+    data21  = POW2_OP(data21, 1);                                                                                  \
+    \
+    vec4 values000;                                                                                                \
+    vec4 values001;                                                                                                \
+    vec4 values010;                                                                                                \
+    vec4 values100;                                                                                                \
+    vec4 values101;                                                                                                \
+    vec4 values11;                                                                                                 \
+    vec4 values200;                                                                                                \
+    vec4 values201;                                                                                                \
+    vec4 values21;                                                                                                 \
+    values000.xyzw = data000.xyzz;                                                                                 \
+    values001.xyzw = vec4(data000.w, data001.xxy);                                                                 \
+    values010.xyzw = vec4(data001.zzw, data010);                                                                   \
+    values100.xyzw = data100.xyzz;                                                                                 \
+    values101.xyzw = vec4(data100.w, data101.xxy);                                                                 \
+    values11.xyzw  = vec4(data101.zzw, data11);                                                                    \
+    values200.xyzw = data200.xyzz;                                                                                 \
+    values201.xyzw = vec4(data200.w, data201.xxy);                                                                 \
+    values21.xyzw  = vec4(data201.zzw, data21);                                                                    \
+    POOL_OP(values000.xyzw, values000.xyzw, values100.xyzw);                                                       \
+    POOL_OP(values001.xyzw, values001.xyzw, values101.xyzw);                                                       \
+    POOL_OP(values010.xyzw, values010.xyzw, values11.xyzw);                                                        \
+    POOL_OP(values000.xyzw, values000.xyzw, values200.xyzw);                                                       \
+    POOL_OP(values001.xyzw, values001.xyzw, values201.xyzw);                                                       \
+    POOL_OP(values010.xyzw, values010.xyzw, values21.xyzw);                                                        \
+    POOL_OP(res.xyzw, vec4(values000.xw, values001.z, values010.y), vec4(values000.y, values001.xw, values010.z)); \
+    POOL_OP(res.xyzw, res.xyzw, vec4(values000.z, values001.y, values010.xw))
+
+#define POOLING3x3_STRIDE3(res, input, output)                                                         \
+    vec4 data000;                                                                                      \
+    vec4 data001;                                                                                      \
+    vec4 data010;                                                                                      \
+    vec4 data100;                                                                                      \
+    vec4 data101;                                                                                      \
+    vec4 data11;                                                                                       \
+    vec4 data200;                                                                                      \
+    vec4 data201;                                                                                      \
+    vec4 data21;                                                                                       \
+    LOAD16(data000, input, tensor3D_offset(input, 0, 0, 0));                                           \
+    LOAD16(data001, input, tensor3D_offset(input, 0, 0, 0) + uint(4));                                 \
+    LOAD16(data010, input, tensor3D_offset(input, 0, 0, 0) + uint(8));                                 \
+    LOAD16(data100, input, tensor3D_offset(input, 0, 1, 0));                                           \
+    LOAD16(data101, input, tensor3D_offset(input, 0, 1, 0) + uint(4));                                 \
+    LOAD16(data11, input, tensor3D_offset(input, 0, 1, 0) + uint(8));                                  \
+    LOAD16(data200, input, tensor3D_offset(input, 0, 2, 0));                                           \
+    LOAD16(data201, input, tensor3D_offset(input, 0, 2, 0) + uint(4));                                 \
+    LOAD16(data21, input, tensor3D_offset(input, 0, 2, 0) + uint(8));                                  \
+    data000 = POW2_OP(data000, 4);                                                                     \
+    data001 = POW2_OP(data001, 4);                                                                     \
+    data010 = POW2_OP(data010, 4);                                                                     \
+    data100 = POW2_OP(data100, 4);                                                                     \
+    data101 = POW2_OP(data101, 4);                                                                     \
+    data11  = POW2_OP(data11, 4);                                                                      \
+    data200 = POW2_OP(data200, 4);                                                                     \
+    data201 = POW2_OP(data201, 4);                                                                     \
+    data21  = POW2_OP(data21, 4);                                                                      \
+    \
+    POOL_OP(data000.xyzw, data000.xyzw, data100.xyzw);                                                 \
+    POOL_OP(data001.xyzw, data001.xyzw, data101.xyzw);                                                 \
+    POOL_OP(data010.xyzw, data010.xyzw, data11.xyzw);                                                  \
+    POOL_OP(data000.xyzw, data000.xyzw, data200.xyzw);                                                 \
+    POOL_OP(data001.xyzw, data001.xyzw, data201.xyzw);                                                 \
+    POOL_OP(data010.xyzw, data010.xyzw, data21.xyzw);                                                  \
+    POOL_OP(res.xyzw, vec4(data000.xw, data001.z, data010.y), vec4(data000.y, data001.xw, data010.z)); \
+    POOL_OP(res.xyzw, res.xyzw, vec4(data000.z, data001.y data010.xw))
+
+float calculate_max(const int pool_size, Tensor3D src, const int upper_bound_w, const int upper_bound_h, const int pad_x, const int pad_y, const int stride_x, const int stride_y)
+{
+    int start_x = int(gl_GlobalInvocationID.x) * stride_x - pad_x;
+    int start_y = int(gl_GlobalInvocationID.y) * stride_y - pad_y;
+    int end_x   = int(min(start_x + pool_size, upper_bound_w));
+    int end_y   = int(min(start_y + pool_size, upper_bound_h));
+
+    float data_max;
+    data_max = LOAD4(src, tensor3D_offset(src, 0, 0, 0));
+
+    for(int i = 0; (start_x + i) < end_x; ++i)
+    {
+        for(int j = 0; (start_y + j) < end_y; ++j)
+        {
+            float data = LOAD4(src, tensor3D_offset(src, i, j, 0));
+            POOL_OP_float(data_max, data_max, data);
+        }
+    }
+
+    return data_max;
+}
+
+float calculate_avg(const int pool_size, Tensor3D src, const int upper_bound_w, const int upper_bound_h, const int pad_x, const int pad_y, const int stride_x, const int stride_y)
+{
+    int start_x = int(gl_GlobalInvocationID.x) * stride_x - pad_x;
+    int start_y = int(gl_GlobalInvocationID.y) * stride_y - pad_y;
+    int end_x   = int(min(start_x + pool_size, upper_bound_w));
+    int end_y   = int(min(start_y + pool_size, upper_bound_h));
+
+    float data_total = 0.0f;
+    for(int i = 0; (start_x + i) < end_x; i++)
+    {
+        for(int j = 0; (start_y + j) < end_y; ++j)
+        {
+            float data = LOAD4(src, tensor3D_offset(src, i, j, 0));
+            if(isnan(data))
+            {
+                data = 0.0f;
+            }
+#if defined(POOL_L2)
+            // Raise to power of 2 for L2 Pooling
+            data = POW2_OP(data, 1);
+#endif /* defined(POOL_L2) */
+            data_total = data_total + data;
+        }
+    }
+
+    return data_total / float((end_y - start_y) * (end_x - start_x));
+}
+
+#ifdef POOLING_LAYER_2
+/** Performs a pooling function of pool size equal to 2.
+ *
+ * @note Supported data types are F32;
+ * @note In case of average pooling the following information must be passed at compile time:
+ *       POOL_AVG must be provided otherwise max pooling will be performed.
+ *       MAX_WIDTH and MAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
+ *       STRIDE_X and STRIDE_Y which are the steps of the window along the x and y directions
+ *       PAD_X and PAD_Y which are the pooling paddings in x and y dimension
+ *
+ * @param[in]  src_ptr                           Pointer to the source image. Supported data types: F32
+ * @param[in]  src_stride_x                      Stride of the source image in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source image in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] dst_ptr                           Pointer to the destination image. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination image in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination image in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination image
+ */
+void main(void)
+{
+    // Get pixels pointer
+    Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+    Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+    //Load and calculate data
+    float res;
+#if defined(POOL_AVG) || defined(POOL_L2)
+    res = calculate_avg(2, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y);
+#else  /*POOL_AVG*/
+    res = calculate_max(2, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y);
+#endif /*POOL_AVG*/
+
+#if defined(POOL_L2)
+    // Take square root of the result in L2 pooling
+    res = SQRT_OP(res);
+#endif /* defined(POOL_L2) */
+
+    // Store result
+    STORE4(dst, CURRENT_OFFSET(dst), res);
+}
+
+#elif defined(POOLING_LAYER_3)
+/** Performs a pooling function of pool size equal to 3.
+ *
+ * @note Supported data types are F32;
+ * @note In case of average pooling the following information must be passed at compile time:
+ *       POOL_AVG must be provided otherwise max pooling will be performed.
+ *       MAX_WIDTH and MAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
+ *       STRIDE_X and STRIDE_Y which are the steps of the window along the x and y directions
+ *       PAD_X and PAD_Y which are the pooling paddings in x and y dimension
+ *
+ * @param[in]  src_ptr                           Pointer to the source image. Supported data types: F32
+ * @param[in]  src_stride_x                      Stride of the source image in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source image in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] dst_ptr                           Pointer to the destination image. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination image in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination image in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination image
+ */
+void main(void)
+{
+    // Get pixels pointer
+    Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+    Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+    //Load and calculate data
+    float res;
+#if defined(POOL_AVG) || defined(POOL_L2)
+    res = calculate_avg(3, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y);
+#else  /*POOL_AVG*/
+    res = calculate_max(3, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y);
+#endif /*POOL_AVG*/
+
+#if defined(POOL_L2)
+    // Take square root of the result in L2 pooling
+    res = SQRT_OP(res);
+#endif /* defined(POOL_L2) */
+
+    // Store result
+    STORE4(dst, CURRENT_OFFSET(dst), res);
+}
+
+#elif defined(POOLING_LAYER_3_OPTIMIZED)
+/** Performs an optimized pooling function of pool size equal to 3 when the stride_x is less equal than 3
+ *
+ * @note Supported data types are F32;
+ * @note In case of average pooling the following information must be passed at compile time:
+ *       POOL_AVG must be provided otherwise max pooling will be performed.
+ *       MAX_WIDTH and MAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
+ *       STRIDE_X and STRIDE_Y which are the steps of the window along the x and y directions
+ *       PAD_X and PAD_Y which are the pooling paddings in x and y dimension
+ *
+ * @param[in]  src_ptr                           Pointer to the source image. Supported data types: F32
+ * @param[in]  src_stride_x                      Stride of the source image in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source image in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] dst_ptr                           Pointer to the destination image. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination image in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination image in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination image
+ */
+void main(void)
+{
+    // Get pixels pointer
+    Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+    Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+    vec4 res;
+    // Perform pooling 3x3 for 4 output elements
+#if STRIDE_X == 1
+    POOLING3x3_STRIDE1(res, src, dst);
+#elif STRIDE_X == 2
+    POOLING3x3_STRIDE2(res, src, dst);
+#elif STRIDE_X == 3
+    POOLING3x3_STRIDE3(res, src, dst);
+#endif /*STRIDE_X == 1*/
+
+    // Divide by pool region in case of average pooling
+#if defined(POOL_AVG) || defined(POOL_L2)
+    ivec4 start_x = ((ivec4(int(gl_GlobalInvocationID.x) * 4) + ivec4(0, 1, 2, 3)) * (ivec4(STRIDE_X))) - (ivec4(PAD_X));
+    int   start_y = int(gl_GlobalInvocationID.y) * STRIDE_Y - PAD_Y;
+    ivec4 end_x   = min((start_x + (ivec4(3))), (ivec4(MAX_WIDTH)));
+    int   end_y   = min((start_y + 3), MAX_HEIGHT);
+    res *= (vec4((1.f)) / vec4((ivec4(end_y - start_y)) * (end_x - start_x)));
+#endif /*POOL_AVG*/
+
+#if defined(POOL_L2)
+    // Take square root of the result in L2 pooling
+    res = SQRT_OP(res);
+#endif /* defined(POOL_L2) */
+
+    STORE16(dst, CURRENT_OFFSET(dst), res);
+}
+
+#elif defined(POOLING_LAYER_7)
+/** Performs a pooling function of pool size equal to 7.
+ *
+ * @note Supported data types are F32;
+ * @note In case of average pooling the following information must be passed at compile time:
+ *       POOL_AVG must be provided otherwise max pooling will be performed.
+ *       MAX_WIDTH and MAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
+ *       STRIDE_X and STRIDE_Y which are the steps of the window along the x and y directions
+ *       PAD_X and PAD_Y which are the pooling paddings in x and y dimension
+ *
+ * @param[in]  src_ptr                           Pointer to the source image. Supported data types: F32
+ * @param[in]  src_stride_x                      Stride of the source image in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source image in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] dst_ptr                           Pointer to the destination image. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination image in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination image in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination image
+ */
+void main(void)
+{
+    // Get pixels pointer
+    Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+    Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+    //Load and calculate data
+    float res;
+#if defined(POOL_AVG) || defined(POOL_L2)
+    res = calculate_avg(7, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y);
+#else  /*POOL_AVG*/
+    res = calculate_max(7, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y);
+#endif /*POOL_AVG*/
+
+#if defined(POOL_L2)
+    // Take square root of the result in L2 pooling
+    res = SQRT_OP(res);
+#endif /* defined(POOL_L2) */
+
+    // Store result
+    STORE4(dst, CURRENT_OFFSET(dst), res);
+}
+
+#elif defined(POOLING_LAYER_N)
+/** Performs a pooling function of pool size equal to N
+ *
+ * @note Supported data types are F32;
+ * @note Pool size must be passed using POOL_SIZE e.g. POOL_SIZE=13;
+ * @note In case of average pooling the following information must be passed at compile time:
+ *       POOL_AVG must be provided otherwise max pooling will be performed.
+ *       MAX_WIDTH and MAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
+ *       STRIDE_X and STRIDE_Y which are the steps of the window along the x and y directions
+ *       PAD_X and PAD_Y which are the pooling paddings in x and y dimension
+ *
+ * @param[in]  src_ptr                           Pointer to the source image. Supported data types: F32
+ * @param[in]  src_stride_x                      Stride of the source image in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source image in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] dst_ptr                           Pointer to the destination image. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination image in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination image in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination image
+ */
+void main(void)
+{
+    // Get pixels pointer
+    Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+    Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
+
+    vec4 vdata0;
+    vdata0 = vec4(INITIAL_VALUE);
+    vec4 vdata1;
+    vdata1 = vec4(INITIAL_VALUE);
+    float sdata;
+    sdata = float(INITIAL_VALUE);
+
+    for(int y = 0; y < int(POOL_SIZE); y++)
+    {
+        int x = 0;
+        for(; x <= (int(POOL_SIZE) - 8); x += 8)
+        {
+            vec4 data2;
+            vec4 data3;
+            LOAD16(data2, src, tensor3D_offset(src, x, y, 0));
+            LOAD16(data3, src, tensor3D_offset(src, x, y, 0) + uint(4));
+
+#if defined(POOL_L2)
+            // Raise to power of 2 for L2 Pooling
+            data2 *= data2;
+            data3 *= data3;
+#endif /* defined(POOL_L2) */
+
+            POOL_OP(vdata0, vdata0, data2);
+            POOL_OP(vdata1, vdata1, data3);
+        }
+
+        // Leftover
+        for(; x < int(POOL_SIZE); ++x)
+        {
+            float data4 = LOAD4(src, tensor3D_offset(src, x, y, 0));
+#if defined(POOL_L2)
+            // Raise to power of 2 for L2 Pooling
+            data4 *= data4;
+#endif /* defined(POOL_L2) */
+            POOL_OP_float(sdata, sdata, data4);
+        }
+    }
+
+    //Reduce result
+    vec4 reduce4;
+    POOL_OP(reduce4, vdata0.xyzw, vdata1.xyzw);
+    vec2 reduce2;
+    POOL_OP_vec2(reduce2, reduce4.xy, reduce4.zw);
+    float res;
+    POOL_OP_float(res, reduce2.x, reduce2.y);
+    POOL_OP_float(res, res, sdata);
+
+#if defined(POOL_AVG) || defined(POOL_L2)
+    {
+        // Divide by pool region in case of average pooling
+        int   start_x = int(gl_GlobalInvocationID.x) * STRIDE_X - PAD_X;
+        int   start_y = int(gl_GlobalInvocationID.y) * STRIDE_Y - PAD_Y;
+        int   end_x   = int(min(STRIDE_X + POOL_SIZE, MAX_WIDTH));
+        int   end_y   = int(min(STRIDE_Y + POOL_SIZE, MAX_HEIGHT));
+        float res1    = float((end_y - start_y) * (end_x - start_x));
+        res           = DIV_OP(res, res1);
+    }
+#endif /* defined(POOL_AVG) || defined(POOL_L2) */
+
+#if defined(POOL_L2)
+    // Take square root of the result in L2 pooling
+    res = SQRT_OP(res);
+#endif /* defined(POOL_L2) */
+
+    // Store result
+    STORE4(dst, CURRENT_OFFSET(dst), res);
+}
+#endif /* POOLING_LAYER_2 */
+
+#elif defined(DATA_TYPE_FP16)
+
+precision mediump float;
+
+vec2 load_and_unpack(Tensor3D, uint);
+vec2 calculate_max(const int, Tensor3D, const int, const int, const int, const int, const int, const int);
+vec2 calculate_avg(const int, Tensor3D, const int, const int, const int, const int, const int, const int);
+
+BUFFER_DECLARATION(src, 1, uint, readonly);
+BUFFER_DECLARATION(dst, 2, uint, writeonly);
+
+layout(std140) uniform shader_params
+{
+    TENSOR3D_PARAM_DECLARATION(src);
+    TENSOR3D_PARAM_DECLARATION(dst);
+};
+
+#define LOAD2_fp16(r, name, offset) \
+    r.xy = load_and_unpack(name, offset)
+
+#define LOAD4_fp16(r, name, offset)       \
+    r.xy = load_and_unpack(name, offset); \
+    r.zw = load_and_unpack(name, offset + uint(1))
+
+#define STORE4_fp16(name, offset, r)             \
+    uint datastore1;                             \
+    uint datastore2;                             \
+    datastore1 = uint(packHalf2x16(r.xy));       \
+    datastore2 = uint(packHalf2x16(r.zw));       \
+    STORE1(name, offset << uint(1), datastore1); \
+    STORE1(name, (offset << uint(1)) + uint(1), datastore2)
+
+#if defined(POOL_AVG) || defined(POOL_L2)
+#define POOL_OP(res, a, b) ((res) = (a) + (b))
+#define POOL_OP_float(res, a, b) (res = a + b)
+#define POOL_OP_vec2(res, a, b) ((res) = (a) + (b))
+#else /* defined(POOL_AVG) || defined(POOL_L2) */
+#define POOL_OP(res, a, b)        \
+    (res) = (a);                  \
+    if(isnan(a.x) || (a.x < b.x)) \
+    {                             \
+        res.x = b.x;              \
+    }                             \
+    if(isnan(a.y) || (a.y < b.y)) \
+    {                             \
+        res.y = b.y;              \
+    }                             \
+    if(isnan(a.z) || (a.z < b.z)) \
+    {                             \
+        res.z = b.z;              \
+    }                             \
+    if(isnan(a.w) || (a.w < b.w)) \
+    {                             \
+        res.w = b.w;              \
+    }
+#define POOL_OP_float(res, a, b) \
+    (res) = (a);                 \
+    if(isnan(a) || (a < b))      \
+    {                            \
+        res = b;                 \
+    }
+#define POOL_OP_vec2(res, a, b)   \
+    (res) = (a);                  \
+    if(isnan(a.x) || (a.x < b.x)) \
+    {                             \
+        res.x = b.x;              \
+    }                             \
+    if(isnan(a.y) || (a.y < b.y)) \
+    {                             \
+        res.y = b.y;              \
+    }
+#endif /* defined(POOL_AVG) || defined(POOL_L2) */
+
+#if defined(POOL_L2)
+#define POW2_OP(x, vec_size) ((x) * (x))
+#else /* defined(POOL_L2) */
+#define POW2_OP(x, vec_size) (x)
+#endif /* defined(POOL_L2) */
+
+#define DIV_OP(x, y) (x * (1.f / y))
+#define SQRT_OP(x) sqrt((x))
+
+#if defined(POOL_SIZE)
+// Set the initial value for the pooling operation accordingly with the data type
+#if defined(POOL_AVG) || defined(POOL_L2)
+#define INITIAL_VALUE 0.0f
+#else /* defined(POOL_AVG) || defined(POOL_L2) */
+#define INITIAL_VALUE -65504.0f
+#endif //POOL_AVG
+#endif //POOL_SIZE
+
+#define POOLING3x3_STRIDE1_fp16(res, input, output)                                                                \
+    vec4 data00;                                                                                                   \
+    vec2 data01;                                                                                                   \
+    vec4 data10;                                                                                                   \
+    vec2 data11;                                                                                                   \
+    vec4 data20;                                                                                                   \
+    vec2 data21;                                                                                                   \
+    LOAD4_fp16(data00, input, (tensor3D_offset_fp16(input, 0, 0, 0) >> uint(2)));                                  \
+    LOAD2_fp16(data01, input, (tensor3D_offset_fp16(input, 0, 0, 0) >> uint(2)) + uint(2));                        \
+    LOAD4_fp16(data10, input, (tensor3D_offset_fp16(input, 0, 1, 0) >> uint(2)));                                  \
+    LOAD2_fp16(data11, input, (tensor3D_offset_fp16(input, 0, 1, 0) >> uint(2)) + uint(2));                        \
+    LOAD4_fp16(data20, input, (tensor3D_offset_fp16(input, 0, 2, 0) >> uint(2)));                                  \
+    LOAD2_fp16(data21, input, (tensor3D_offset_fp16(input, 0, 2, 0) >> uint(2)) + uint(2));                        \
+    data00 = POW2_OP(data00, 4);                                                                                   \
+    data01 = POW2_OP(data01, 2);                                                                                   \
+    data10 = POW2_OP(data10, 4);                                                                                   \
+    data11 = POW2_OP(data11, 2);                                                                                   \
+    data20 = POW2_OP(data20, 4);                                                                                   \
+    data21 = POW2_OP(data21, 2);                                                                                   \
+    \
+    vec4 values000;                                                                                                \
+    vec4 values001;                                                                                                \
+    vec4 values010;                                                                                                \
+    vec4 values100;                                                                                                \
+    vec4 values101;                                                                                                \
+    vec4 values11;                                                                                                 \
+    vec4 values200;                                                                                                \
+    vec4 values201;                                                                                                \
+    vec4 values21;                                                                                                 \
+    values000.xyzw = data00.xyzy;                                                                                  \
+    values001.xyzw = data00.zwzw;                                                                                  \
+    values010.x    = data01.x;                                                                                     \
+    values010.y    = data00.w;                                                                                     \
+    values010.zw   = data01.xy;                                                                                    \
+    values100.xyzw = data10.xyzy;                                                                                  \
+    values101.xyzw = data10.zwzw;                                                                                  \
+    values11.x     = data11.x;                                                                                     \
+    values11.y     = data10.w;                                                                                     \
+    values11.zw    = data11.xy;                                                                                    \
+    values200.xyzw = data20.xyzy;                                                                                  \
+    values201.xyzw = data20.zwzw;                                                                                  \
+    values21.x     = data21.x;                                                                                     \
+    values21.y     = data20.w;                                                                                     \
+    values21.zw    = data21.xy;                                                                                    \
+    POOL_OP(values000.xyzw, values000.xyzw, values100.xyzw);                                                       \
+    POOL_OP(values001.xyzw, values001.xyzw, values101.xyzw);                                                       \
+    POOL_OP(values010.xyzw, values010.xyzw, values11.xyzw);                                                        \
+    POOL_OP(values000.xyzw, values000.xyzw, values200.xyzw);                                                       \
+    POOL_OP(values001.xyzw, values001.xyzw, values201.xyzw);                                                       \
+    POOL_OP(values010.xyzw, values010.xyzw, values21.xyzw);                                                        \
+    POOL_OP(res.xyzw, vec4(values000.xw, values001.z, values010.y), vec4(values000.y, values001.xw, values010.z)); \
+    POOL_OP(res.xyzw, res.xyzw, vec4(values000.z, values001.y, values010.xw))
+
+#define POOLING3x3_STRIDE2_fp16(res, input, output)                                                                \
+    vec4  data000;                                                                                                 \
+    vec4  data001;                                                                                                 \
+    float data010;                                                                                                 \
+    vec4  data100;                                                                                                 \
+    vec4  data101;                                                                                                 \
+    float data11;                                                                                                  \
+    vec4  data200;                                                                                                 \
+    vec4  data201;                                                                                                 \
+    float data21;                                                                                                  \
+    vec2  datamiddle0;                                                                                             \
+    vec2  datamiddle1;                                                                                             \
+    vec2  datamiddle2;                                                                                             \
+    LOAD4_fp16(data000, input, (tensor3D_offset_fp16(input, 0, 0, 0) >> uint(2)));                                 \
+    LOAD4_fp16(data001, input, (tensor3D_offset_fp16(input, 0, 0, 0) >> uint(2)) + uint(2));                       \
+    datamiddle0 = load_and_unpack(input, (tensor3D_offset_fp16(input, 0, 0, 0) >> uint(2)) + uint(4));             \
+    data010     = datamiddle0.x;                                                                                   \
+    LOAD4_fp16(data100, input, (tensor3D_offset_fp16(input, 0, 1, 0) >> uint(2)));                                 \
+    LOAD4_fp16(data101, input, (tensor3D_offset_fp16(input, 0, 1, 0) >> uint(2)) + uint(2));                       \
+    datamiddle1 = load_and_unpack(input, (tensor3D_offset_fp16(input, 0, 1, 0) >> uint(2)) + uint(4));             \
+    data11      = datamiddle1.x;                                                                                   \
+    LOAD4_fp16(data200, input, (tensor3D_offset_fp16(input, 0, 2, 0) >> uint(2)));                                 \
+    LOAD4_fp16(data201, input, (tensor3D_offset_fp16(input, 0, 2, 0) >> uint(2)) + uint(2));                       \
+    datamiddle2 = load_and_unpack(input, (tensor3D_offset_fp16(input, 0, 2, 0) >> uint(2)) + uint(4));             \
+    data21      = datamiddle2.x;                                                                                   \
+    data000     = POW2_OP(data000, 4);                                                                             \
+    data001     = POW2_OP(data001, 4);                                                                             \
+    data010     = POW2_OP(data010, 1);                                                                             \
+    data100     = POW2_OP(data100, 4);                                                                             \
+    data101     = POW2_OP(data101, 4);                                                                             \
+    data11      = POW2_OP(data11, 1);                                                                              \
+    data200     = POW2_OP(data200, 4);                                                                             \
+    data201     = POW2_OP(data201, 4);                                                                             \
+    data21      = POW2_OP(data21, 1);                                                                              \
+    \
+    vec4 values000;                                                                                                \
+    vec4 values001;                                                                                                \
+    vec4 values010;                                                                                                \
+    vec4 values100;                                                                                                \
+    vec4 values101;                                                                                                \
+    vec4 values11;                                                                                                 \
+    vec4 values200;                                                                                                \
+    vec4 values201;                                                                                                \
+    vec4 values21;                                                                                                 \
+    values000.xyzw = data000.xyzz;                                                                                 \
+    values001.xyzw = vec4(data000.w, data001.xxy);                                                                 \
+    values010.xyzw = vec4(data001.zzw, data010);                                                                   \
+    values100.xyzw = data100.xyzz;                                                                                 \
+    values101.xyzw = vec4(data100.w, data101.xxy);                                                                 \
+    values11.xyzw  = vec4(data101.zzw, data11);                                                                    \
+    values200.xyzw = data200.xyzz;                                                                                 \
+    values201.xyzw = vec4(data200.w, data201.xxy);                                                                 \
+    values21.xyzw  = vec4(data201.zzw, data21);                                                                    \
+    POOL_OP(values000.xyzw, values000.xyzw, values100.xyzw);                                                       \
+    POOL_OP(values001.xyzw, values001.xyzw, values101.xyzw);                                                       \
+    POOL_OP(values010.xyzw, values010.xyzw, values11.xyzw);                                                        \
+    POOL_OP(values000.xyzw, values000.xyzw, values200.xyzw);                                                       \
+    POOL_OP(values001.xyzw, values001.xyzw, values201.xyzw);                                                       \
+    POOL_OP(values010.xyzw, values010.xyzw, values21.xyzw);                                                        \
+    POOL_OP(res.xyzw, vec4(values000.xw, values001.z, values010.y), vec4(values000.y, values001.xw, values010.z)); \
+    POOL_OP(res.xyzw, res.xyzw, vec4(values000.z, values001.y, values010.xw))
+
+#define POOLING3x3_STRIDE3_fp16(res, input, output)                                                    \
+    vec4 data000;                                                                                      \
+    vec4 data001;                                                                                      \
+    vec4 data010;                                                                                      \
+    vec4 data100;                                                                                      \
+    vec4 data101;                                                                                      \
+    vec4 data11;                                                                                       \
+    vec4 data200;                                                                                      \
+    vec4 data201;                                                                                      \
+    vec4 data21;                                                                                       \
+    LOAD4_fp16(data000, input, (tensor3D_offset_fp16(input, 0, 0, 0) >> uint(2)));                     \
+    LOAD4_fp16(data001, input, (tensor3D_offset_fp16(input, 0, 0, 0) >> uint(2)) + uint(2));           \
+    LOAD4_fp16(data010, input, (tensor3D_offset_fp16(input, 0, 0, 0) >> uint(2)) + uint(4));           \
+    LOAD4_fp16(data100, input, (tensor3D_offset_fp16(input, 0, 1, 0) >> uint(2)));                     \
+    LOAD4_fp16(data101, input, (tensor3D_offset_fp16(input, 0, 1, 0) >> uint(2)) + uint(2));           \
+    LOAD4_fp16(data11, input, (tensor3D_offset_fp16(input, 0, 1, 0) >> uint(2)) + uint(4));            \
+    LOAD4_fp16(data200, input, (tensor3D_offset_fp16(input, 0, 2, 0) >> uint(2)));                     \
+    LOAD4_fp16(data201, input, (tensor3D_offset_fp16(input, 0, 2, 0) >> uint(2)) + uint(2));           \
+    LOAD4_fp16(data21, input, (tensor3D_offset_fp16(input, 0, 2, 0) >> uint(2)) + uint(4));            \
+    data000 = POW2_OP(data000, 4);                                                                     \
+    data001 = POW2_OP(data001, 4);                                                                     \
+    data010 = POW2_OP(data010, 4);                                                                     \
+    data100 = POW2_OP(data100, 4);                                                                     \
+    data101 = POW2_OP(data101, 4);                                                                     \
+    data11  = POW2_OP(data11, 4);                                                                      \
+    data200 = POW2_OP(data200, 4);                                                                     \
+    data201 = POW2_OP(data201, 4);                                                                     \
+    data21  = POW2_OP(data21, 4);                                                                      \
+    \
+    POOL_OP(data000.xyzw, data000.xyzw, data100.xyzw);                                                 \
+    POOL_OP(data001.xyzw, data001.xyzw, data101.xyzw);                                                 \
+    POOL_OP(data010.xyzw, data010.xyzw, data11.xyzw);                                                  \
+    POOL_OP(data000.xyzw, data000.xyzw, data200.xyzw);                                                 \
+    POOL_OP(data001.xyzw, data001.xyzw, data201.xyzw);                                                 \
+    POOL_OP(data010.xyzw, data010.xyzw, data21.xyzw);                                                  \
+    POOL_OP(res.xyzw, vec4(data000.xw, data001.z, data010.y), vec4(data000.y, data001.xw, data010.z)); \
+    POOL_OP(res.xyzw, res.xyzw, vec4(data000.z, data001.y data010.xw))
+
+vec2 load_and_unpack(Tensor3D src, uint offset)
+{
+    uint packed_s;
+    vec2 s;
+    LOAD1(packed_s, src, offset);
+
+    s = vec2(unpackHalf2x16(packed_s));
+    return s;
+}
+
+vec2 calculate_max(const int pool_size, Tensor3D src, const int upper_bound_w, const int upper_bound_h, const int pad_x, const int pad_y, const int stride_x, const int stride_y)
+{
+    int start_x1 = int(gl_GlobalInvocationID.x) * stride_x - pad_x;
+    int start_y1 = int(gl_GlobalInvocationID.y) * stride_y - pad_y;
+    int end_x1   = int(min(start_x1 + pool_size, upper_bound_w));
+    int end_y1   = int(min(start_y1 + pool_size, upper_bound_h));
+
+    int start_x2 = start_x1 + stride_x;
+    int start_y2 = start_y1;
+    int end_x2   = int(min(start_x2 + pool_size, upper_bound_w));
+    int end_y2   = int(min(start_y2 + pool_size, upper_bound_h));
+
+    //Initialize maximum
+    vec2 data_max = vec2(0);
+
+    //Load and Set initial maximum1
+    vec2 data_init1 = load_and_unpack(src, tensor3D_offset_fp16(src, 0, 0, 0) >> uint(2));
+    data_max.x      = data_init1.x;
+
+    //Load and Set initial maximum2
+    if(end_x1 < upper_bound_w)
+    {
+        if((stride_x % 2) == 0)
+        {
+            vec2 data_init2 = load_and_unpack(src, tensor3D_offset_fp16(src, stride_x, 0, 0) >> uint(2));
+            data_max.y      = data_init2.x;
+        }
+        else
+        {
+            vec2 data_init2 = load_and_unpack(src, tensor3D_offset_fp16(src, stride_x - 1, 0, 0) >> uint(2));
+            data_max.y      = data_init2.y;
+        }
+    }
+
+    for(int i = 0; (start_y1 + i) < end_y1; i++)
+        for(int j = 0; (start_x1 + j) < end_x1; j = j + 2)
+        {
+            //Calculate maximum1
+            if((start_x1 + j + 1) < end_x1)
+            {
+                vec2  data1 = load_and_unpack(src, tensor3D_offset_fp16(src, j, i, 0) >> uint(2));
+                float data_mr1;
+                POOL_OP_float(data_mr1, data1.x, data1.y);
+                POOL_OP_float(data_max.x, data_max.x, data_mr1);
+            }
+            else
+            {
+                vec2 data1 = load_and_unpack(src, tensor3D_offset_fp16(src, j, i, 0) >> uint(2));
+                POOL_OP_float(data_max.x, data_max.x, data1.x);
+            }
+
+            //Calculate maximum2
+            if((start_x2 + j) < end_x2 && end_x1 < upper_bound_w)
+            {
+                if((stride_x % 2) == 0)
+                {
+                    vec2 data2 = load_and_unpack(src, (tensor3D_offset_fp16(src, (j + stride_x), i, 0) >> uint(2)));
+
+                    if((start_x2 + j + 1) < end_x2)
+                    {
+                        float data_mr2;
+                        POOL_OP_float(data_mr2, data2.x, data2.y);
+                        POOL_OP_float(data_max.y, data_max.y, data_mr2);
+                    }
+                    else
+                    {
+                        POOL_OP_float(data_max.y, data_max.y, data2.x);
+                    }
+                }
+                else
+                {
+                    vec2 data2 = load_and_unpack(src, (tensor3D_offset_fp16(src, (j + stride_x - 1), i, 0) >> uint(2)));
+                    vec2 data3 = load_and_unpack(src, (tensor3D_offset_fp16(src, (j + stride_x + 1), i, 0) >> uint(2)));
+                    if((start_x2 + j + 1) < end_x2)
+                    {
+                        float data_mr2;
+                        POOL_OP_float(data_mr2, data3.x, data2.y);
+                        POOL_OP_float(data_max.y, data_max.y, data_mr2);
+                    }
+                    else
+                    {
+                        POOL_OP_float(data_max.y, data_max.y, data2.y);
+                    }
+                }
+            }
+        }
+    return data_max;
+}
+
+vec2 calculate_avg(const int pool_size, Tensor3D src, const int upper_bound_w, const int upper_bound_h, const int pad_x, const int pad_y, const int stride_x, const int stride_y)
+{
+    int start_x1 = int(gl_GlobalInvocationID.x) * stride_x - pad_x;
+    int start_y1 = int(gl_GlobalInvocationID.y) * stride_y - pad_y;
+    int end_x1   = int(min(start_x1 + pool_size, upper_bound_w));
+    int end_y1   = int(min(start_y1 + pool_size, upper_bound_h));
+
+    int start_x2 = start_x1 + stride_x;
+    int start_y2 = start_y1;
+    int end_x2   = int(min(start_x2 + pool_size, upper_bound_w));
+    int end_y2   = int(min(start_y2 + pool_size, upper_bound_h));
+
+    //Initialize sum
+    float data_total1 = float(0);
+    float data_total2 = float(0);
+    for(int i = 0; (start_y1 + i) < end_y1; i++)
+        for(int j = 0; (start_x1 + j) < end_x1; j = j + 2)
+        {
+            vec2 data1 = load_and_unpack(src, tensor3D_offset_fp16(src, j, i, 0) >> uint(2));
+#if defined(POOL_L2)
+            // Raise to power of 2 for L2 Pooling
+            data1 = POW2_OP(data1, 2);
+#endif /* defined(POOL_L2) */
+            //Calculate sum1
+            if((start_x1 + j + 1) < end_x1)
+            {
+                data_total1 = data_total1 + data1.x + data1.y;
+            }
+            else
+            {
+                data_total1 = data_total1 + data1.x;
+            }
+
+            //Calculate sum2
+            if((start_x2 + j) < end_x2 && end_x1 < upper_bound_w)
+            {
+                if((stride_x % 2) == 0)
+                {
+                    vec2 data2 = load_and_unpack(src, (tensor3D_offset_fp16(src, (j + stride_x + 1), i, 0) >> uint(2)));
+#if defined(POOL_L2)
+                    // Raise to power of 2 for L2 Pooling
+                    data2 = POW2_OP(data2, 2);
+#endif /* defined(POOL_L2) */
+                    if((start_x2 + j + 1) < end_x2)
+                    {
+                        data_total2 = data_total2 + data2.x + data2.y;
+                    }
+                    else
+                    {
+                        data_total2 = data_total2 + data2.x;
+                    }
+                }
+                else
+                {
+                    vec2 data2 = load_and_unpack(src, (tensor3D_offset_fp16(src, (j + stride_x - 1), i, 0) >> uint(2)));
+                    vec2 data3 = load_and_unpack(src, (tensor3D_offset_fp16(src, (j + stride_x + 1), i, 0) >> uint(2)));
+#if defined(POOL_L2)
+                    // Raise to power of 2 for L2 Pooling
+                    data2 = POW2_OP(data2, 2);
+                    data3 = POW2_OP(data3, 2);
+#endif /* defined(POOL_L2) */
+                    if((start_x2 + j + 1) < end_x2)
+                    {
+                        data_total2 = data_total2 + data3.x + data2.y;
+                    }
+                    else
+                    {
+                        data_total2 = data_total2 + data2.y;
+                    }
+                }
+            }
+        }
+    //Calculate average
+    vec2 data_avg;
+    data_avg.x = data_total1 / float((end_y1 - start_y1) * (end_x1 - start_x1));
+    data_avg.y = data_total2 / float((end_y2 - start_y2) * (end_x2 - start_x2));
+
+    return data_avg;
+}
+
+#ifdef POOLING_LAYER_2
+/** Performs a pooling function of pool size equal to 2.
+ *
+ * @note Supported data types are F16;
+ * @note In case of average pooling the following information must be passed at compile time:
+ *       POOL_AVG must be provided otherwise max pooling will be performed.
+ *       MAX_WIDTH and MAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
+ *       STRIDE_X and STRIDE_Y which are the steps of the window along the x and y directions
+ *       PAD_X and PAD_Y which are the pooling paddings in x and y dimension
+ *
+ * @param[in]  src_ptr                           Pointer to the source image. Supported data types: F16
+ * @param[in]  src_stride_x                      Stride of the source image in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source image in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] dst_ptr                           Pointer to the destination image. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination image in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination image in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination image
+ */
+void main(void)
+{
+    // Get pixels pointer
+    Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT_FP16(src);
+    Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst);
+
+    //Load and calculate data
+    vec2 data;
+    uint res;
+#if defined(POOL_AVG) || defined(POOL_L2)
+    data = calculate_avg(2, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y);
+#else  /*POOL_AVG*/
+    data = calculate_max(2, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y);
+#endif /*POOL_AVG*/
+
+#if defined(POOL_L2)
+    // Take square root of the result in L2 pooling
+    data = SQRT_OP(data);
+#endif /* defined(POOL_L2) */
+
+    res = uint(packHalf2x16(data));
+
+    // Store result
+    STORE1(dst, CURRENT_OFFSET(dst) >> uint(2), res);
+}
+
+#elif defined(POOLING_LAYER_3)
+/** Performs a pooling function of pool size equal to 3.
+ *
+ * @note Supported data types are F16;
+ * @note In case of average pooling the following information must be passed at compile time:
+ *       POOL_AVG must be provided otherwise max pooling will be performed.
+ *       MAX_WIDTH and MAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
+ *       STRIDE_X and STRIDE_Y which are the steps of the window along the x and y directions
+ *       PAD_X and PAD_Y which are the pooling paddings in x and y dimension
+ *
+ * @param[in]  src_ptr                           Pointer to the source image. Supported data types: F16
+ * @param[in]  src_stride_x                      Stride of the source image in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source image in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] dst_ptr                           Pointer to the destination image. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination image in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination image in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination image
+ */
+void main(void)
+{
+    // Get pixels pointer
+    Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT_FP16(src);
+    Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst);
+
+    //Load and calculate data
+    vec2 data;
+    uint res;
+#if defined(POOL_AVG) || defined(POOL_L2)
+    data = calculate_avg(3, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y);
+#else  /*POOL_AVG*/
+    data = calculate_max(3, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y);
+#endif /*POOL_AVG*/
+
+#if defined(POOL_L2)
+    // Take square root of the result in L2 pooling
+    data = SQRT_OP(data);
+#endif /* defined(POOL_L2) */
+
+    res = uint(packHalf2x16(data));
+
+    // Store result
+    STORE1(dst, CURRENT_OFFSET(dst) >> uint(2), res);
+}
+
+#elif defined(POOLING_LAYER_3_OPTIMIZED)
+/** Performs an optimized pooling function of pool size equal to 3 when the stride_x is less equal than 3
+ *
+ * @note Supported data types are F16;
+ * @note In case of average pooling the following information must be passed at compile time:
+ *       POOL_AVG must be provided otherwise max pooling will be performed.
+ *       MAX_WIDTH and MAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
+ *       STRIDE_X and STRIDE_Y which are the steps of the window along the x and y directions
+ *       PAD_X and PAD_Y which are the pooling paddings in x and y dimension
+ *
+ * @param[in]  src_ptr                           Pointer to the source image. Supported data types: F16
+ * @param[in]  src_stride_x                      Stride of the source image in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source image in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] dst_ptr                           Pointer to the destination image. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination image in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination image in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination image
+ */
+void main(void)
+{
+    // Get pixels pointer
+    Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT_FP16(src);
+    Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst);
+
+    vec4 res;
+    // Perform pooling 3x3 for 4 output elements
+#if STRIDE_X == 1
+    POOLING3x3_STRIDE1_fp16(res, src, dst);
+#elif STRIDE_X == 2
+    POOLING3x3_STRIDE2_fp16(res, src, dst);
+#elif STRIDE_X == 3
+    POOLING3x3_STRIDE3_fp16(res, src, dst);
+#endif /*STRIDE_X == 1*/
+
+    // Divide by pool region in case of average pooling
+#if defined(POOL_AVG) || defined(POOL_L2)
+    ivec4 start_x = ((ivec4(int(gl_GlobalInvocationID.x) * 4) + ivec4(0, 1, 2, 3)) * (ivec4(STRIDE_X))) - (ivec4(PAD_X));
+    int   start_y = int(gl_GlobalInvocationID.y) * STRIDE_Y - PAD_Y;
+    ivec4 end_x   = min((start_x + (ivec4(3))), (ivec4(MAX_WIDTH)));
+    int   end_y   = min((start_y + 3), MAX_HEIGHT);
+    res *= (vec4((1.f)) / vec4((ivec4(end_y - start_y)) * (end_x - start_x)));
+#endif /*POOL_AVG*/
+
+#if defined(POOL_L2)
+    // Take square root of the result in L2 pooling
+    res = SQRT_OP(res);
+#endif /* defined(POOL_L2) */
+
+    STORE4_fp16(dst, CURRENT_OFFSET(dst) >> uint(3), res);
+}
+
+#elif defined(POOLING_LAYER_7)
+/** Performs a pooling function of pool size equal to 7.
+ *
+ * @note Supported data types are F16;
+ * @note In case of average pooling the following information must be passed at compile time:
+ *       POOL_AVG must be provided otherwise max pooling will be performed.
+ *       MAX_WIDTH and MAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
+ *       STRIDE_X and STRIDE_Y which are the steps of the window along the x and y directions
+ *       PAD_X and PAD_Y which are the pooling paddings in x and y dimension
+ *
+ * @param[in]  src_ptr                           Pointer to the source image. Supported data types: F16
+ * @param[in]  src_stride_x                      Stride of the source image in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source image in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] dst_ptr                           Pointer to the destination image. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination image in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination image in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination image
+ */
+void main(void)
+{
+    // Get pixels pointer
+    Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT_FP16(src);
+    Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst);
+
+    //Load and calculate data
+    vec2 data;
+    uint res;
+#if defined(POOL_AVG) || defined(POOL_L2)
+    data = calculate_avg(7, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y);
+#else  /*POOL_AVG*/
+    data = calculate_max(7, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y);
+#endif /*POOL_AVG*/
+
+#if defined(POOL_L2)
+    // Take square root of the result in L2 pooling
+    data = SQRT_OP(data);
+#endif /* defined(POOL_L2) */
+
+    res = uint(packHalf2x16(data));
+
+    // Store result
+    STORE1(dst, CURRENT_OFFSET(dst) >> uint(2), res);
+}
+
+#elif defined(POOLING_LAYER_N)
+/** Performs a pooling function of pool size equal to N
+ *
+ * @note Supported data types are F16;
+ * @note Pool size must be passed using POOL_SIZE e.g. POOL_SIZE=13;
+ * @note In case of average pooling the following information must be passed at compile time:
+ *       POOL_AVG must be provided otherwise max pooling will be performed.
+ *       MAX_WIDTH and MAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
+ *       STRIDE_X and STRIDE_Y which are the steps of the window along the x and y directions
+ *       PAD_X and PAD_Y which are the pooling paddings in x and y dimension
+ *
+ * @param[in]  src_ptr                           Pointer to the source image. Supported data types: F16
+ * @param[in]  src_stride_x                      Stride of the source image in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source image in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] dst_ptr                           Pointer to the destination image. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination image in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination image in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination image
+ */
+void main(void)
+{
+    // Get pixels pointer
+    Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT_FP16(src);
+    Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst);
+
+    vec4 vdata00;
+    vdata00 = vec4(INITIAL_VALUE);
+    vec4 vdata01;
+    vdata01 = vec4(INITIAL_VALUE);
+    vec4 vdata10;
+    vdata10 = vec4(INITIAL_VALUE);
+    vec4 vdata11;
+    vdata11 = vec4(INITIAL_VALUE);
+    vec2 sdata;
+    sdata = vec2(INITIAL_VALUE);
+
+    for(int y = 0; y < int(POOL_SIZE); y++)
+    {
+        int x = 0;
+        for(; x <= (int(POOL_SIZE) - 8); x += 8)
+        {
+            vec4 data2;
+            vec4 data3;
+            LOAD4_fp16(data2, src, (tensor3D_offset_fp16(src, x, y, 0) >> uint(2)));
+            LOAD4_fp16(data3, src, (tensor3D_offset_fp16(src, x, y, 0) >> uint(2)) + uint(2));
+
+#if defined(POOL_L2)
+            // Raise to power of 2 for L2 Pooling
+            data2 *= data2;
+            data3 *= data3;
+#endif /* defined(POOL_L2) */
+
+            POOL_OP(vdata00, vdata00, data2);
+            POOL_OP(vdata10, vdata10, data3);
+        }
+
+        // Leftover
+        for(; x < int(POOL_SIZE); x = x + 2)
+        {
+            vec2 data4middle;
+            data4middle = load_and_unpack(src, (tensor3D_offset_fp16(src, x, y, 0) >> uint(2)));
+#if defined(POOL_L2)
+            // Raise to power of 2 for L2 Pooling
+            data4middle *= data4middle;
+#endif /* defined(POOL_L2) */
+            if((x + 1) >= int(POOL_SIZE))
+            {
+                POOL_OP_float(sdata.x, sdata.x, data4middle.x);
+            }
+            else
+            {
+                float data4;
+                POOL_OP_float(data4, data4middle.x, data4middle.y);
+                POOL_OP_float(sdata.x, sdata.x, data4);
+            }
+        }
+    }
+
+    for(int y = STRIDE_X; y < int(POOL_SIZE + STRIDE_X); y++)
+    {
+        int x1 = STRIDE_X;
+        for(; x1 <= (int(POOL_SIZE + STRIDE_X) - 8); x1 += 8)
+        {
+            vec4 data2;
+            vec4 data3;
+            LOAD4_fp16(data2, src, (tensor3D_offset_fp16(src, x1, y, 0) >> uint(2)));
+            LOAD4_fp16(data3, src, (tensor3D_offset_fp16(src, x1, y, 0) >> uint(2)) + uint(2));
+
+#if defined(POOL_L2)
+            // Raise to power of 2 for L2 Pooling
+            data2 *= data2;
+            data3 *= data3;
+#endif /* defined(POOL_L2) */
+
+            POOL_OP(vdata01, vdata01, data2);
+            POOL_OP(vdata11, vdata11, data3);
+        }
+
+        // Leftover
+        for(; x1 < int(POOL_SIZE + STRIDE_X); x1 = x1 + 2)
+        {
+            vec2 data4middle;
+            data4middle = load_and_unpack(src, (tensor3D_offset_fp16(src, x1, y, 0) >> uint(2)));
+#if defined(POOL_L2)
+            // Raise to power of 2 for L2 Pooling
+            data4middle *= data4middle;
+#endif /* defined(POOL_L2) */
+            if((x1 + 1) >= int(POOL_SIZE + STRIDE_X))
+            {
+                POOL_OP_float(sdata.y, sdata.y, data4middle.x);
+            }
+            else
+            {
+                float data4;
+                POOL_OP_float(data4, data4middle.x, data4middle.y);
+                POOL_OP_float(sdata.y, sdata.y, data4);
+            }
+        }
+    }
+
+    //Reduce result
+    vec4 reduce40;
+    POOL_OP(reduce40, vdata00.xyzw, vdata10.xyzw);
+    vec2 reduce20;
+    POOL_OP_vec2(reduce20, reduce40.xy, reduce40.zw);
+    vec4 reduce41;
+    POOL_OP(reduce41, vdata01.xyzw, vdata11.xyzw);
+    vec2 reduce21;
+    POOL_OP_vec2(reduce21, reduce41.xy, reduce41.zw);
+    vec2 data;
+    POOL_OP_float(data.x, reduce20.x, reduce20.y);
+    POOL_OP_float(data.x, data.x, sdata.x);
+    POOL_OP_float(data.y, reduce21.x, reduce21.y);
+    POOL_OP_float(data.y, data.y, sdata.y);
+
+#if defined(POOL_AVG) || defined(POOL_L2)
+    {
+        // Divide by pool region in case of average pooling
+        int  start_x1 = int(gl_GlobalInvocationID.x) * STRIDE_X - PAD_X;
+        int  start_y1 = int(gl_GlobalInvocationID.y) * STRIDE_Y - PAD_Y;
+        int  end_x1   = int(min(start_x1 + POOL_SIZE, MAX_WIDTH));
+        int  end_y1   = int(min(start_y1 + POOL_SIZE, MAX_HEIGHT));
+        int  start_x2 = start_x1 + STRIDE_X;
+        int  start_y2 = start_y1;
+        int  end_x2   = int(min(start_x2 + POOL_SIZE, MAX_WIDTH));
+        int  end_y2   = int(min(start_y2 + POOL_SIZE, MAX_HEIGHT));
+        vec2 res1;
+        res1.x = float((end_y1 - start_y1) * (end_x1 - start_x1));
+        res1.y = float((end_y2 - start_y2) * (end_x2 - start_x2));
+        data.x = DIV_OP(data.x, res1.x);
+        data.y = DIV_OP(data.y, res1.y);
+    }
+#endif /* defined(POOL_AVG) || defined(POOL_L2) */
+
+#if defined(POOL_L2)
+    // Take square root of the result in L2 pooling
+    data = SQRT_OP(data);
+#endif /* defined(POOL_L2) */
+    uint res;
+    res = uint(packHalf2x16(data));
+
+    // Store result
+    STORE1(dst, CURRENT_OFFSET(dst) >> uint(2), res);
+}
+#endif /*POOLING_LAYER_2*/
+#endif /*DATA_TYPE_FP32 */
diff --git a/src/core/GLES_COMPUTE/cs_shaders/softmax_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/softmax_layer.cs
new file mode 100644
index 0000000000..0bbabeaafc
--- /dev/null
+++ b/src/core/GLES_COMPUTE/cs_shaders/softmax_layer.cs
@@ -0,0 +1,541 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
+
+#include "helpers.h"
+
+#define MAX_OP(x, y) max((x), (y))
+#define ADD_OP(x, y) ((x) + (y))
+#define SUB_OP(x, y) ((x) - (y))
+#define DIV_OP(x, y) ((x) / (y))
+#define EXP_OP(x) exp((x))
+
+#if defined(DATA_TYPE_FP32)
+const float MINVAL   = -1.0 / 0.0;
+vec4        type_min = CONVERT(MINVAL, vec4);
+
+#define LOAD16(name, offset)            \
+    vec4(LOAD4(name, offset),           \
+         LOAD4(name, offset + uint(1)), \
+         LOAD4(name, offset + uint(2)), \
+         LOAD4(name, offset + uint(3)))
+
+#define STORE16(name, offset, value)         \
+    STORE4(name, offset, value.x);           \
+    STORE4(name, offset + uint(1), value.y); \
+    STORE4(name, offset + uint(2), value.z); \
+    STORE4(name, offset + uint(3), value.w)
+
+#ifdef SOFTMAX_LAYER_MAX
+BUFFER_DECLARATION(src, 1, float, readonly);
+BUFFER_DECLARATION(dst, 2, float, writeonly);
+#elif defined(SOFTMAX_LAYER_SHIFT_EXP_SUM)
+BUFFER_DECLARATION(src, 1, float, readonly);
+BUFFER_DECLARATION(max, 2, float, readonly);
+BUFFER_DECLARATION(dst, 3, float, writeonly);
+BUFFER_DECLARATION(sum, 4, float, writeonly);
+#elif defined(SOFTMAX_LAYER_NORM)
+BUFFER_DECLARATION(src, 1, float, readonly);
+BUFFER_DECLARATION(sum, 2, float, readonly);
+BUFFER_DECLARATION(dst, 3, float, writeonly);
+#endif // SOFTMAX_LAYER_MAX
+
+layout(std140) uniform shader_params
+{
+#ifdef SOFTMAX_LAYER_MAX
+    TENSOR3D_PARAM_DECLARATION(src);
+    TENSOR3D_PARAM_DECLARATION(dst);
+    uint width;
+#elif defined(SOFTMAX_LAYER_SHIFT_EXP_SUM)
+    TENSOR3D_PARAM_DECLARATION(src);
+    TENSOR3D_PARAM_DECLARATION(max);
+    TENSOR3D_PARAM_DECLARATION(dst);
+    TENSOR3D_PARAM_DECLARATION(sum);
+    uint width;
+#elif defined(SOFTMAX_LAYER_NORM)
+    TENSOR3D_PARAM_DECLARATION(src);
+    TENSOR3D_PARAM_DECLARATION(sum);
+    TENSOR3D_PARAM_DECLARATION(dst);
+#endif // SOFTMAX_LAYER_MAX
+};
+
+#ifdef SOFTMAX_LAYER_MAX
+/** Identifies the maximum value across the 1st dimension.
+ *
+ * @note Datatype must be given as a preprocessor argument using "#define DATA_TYPE_FP32"
+ *
+ * @param[in]  src_ptr                           Pointer to the source tensor slice. Supported data types: F32
+ * @param[in]  src_stride_x                      Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source tensor
+ * @param[out] dst_ptr                           Pointer to the destination tensor slice. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ * @param[in]  width                             Input image width
+ */
+void main(void)
+{
+    Image src = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(src);
+    Image dst = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(dst);
+
+    // Initialize local maximum
+    vec4 max_val = CONVERT(type_min, vec4);
+
+    // Calculate max of row
+    uint width2 = width >> 2;
+    for(int i = 0; i < int(width2); i++)
+    {
+        vec4 data = LOAD16(src, offset(src, i << 2, 0));
+        max_val   = MAX_OP(data, max_val);
+    }
+
+#ifdef NON_MULTIPLE_OF_4
+    // Handle non multiple of 4
+    for(int i = int(width2 << 2); i < int(width); i++)
+    {
+        float data = LOAD4(src, offset(src, i, 0));
+        max_val.x  = MAX_OP(data, max_val.x);
+    }
+#endif /* NON_MULTIPLE_OF_4 */
+
+    // Perform max reduction
+    max_val.xy = MAX_OP(max_val.xy, max_val.zw);
+    max_val.x  = MAX_OP(max_val.x, max_val.y);
+
+    // Store result
+    STORE4(dst, CURRENT_OFFSET(dst), max_val.x);
+}
+#elif defined(SOFTMAX_LAYER_SHIFT_EXP_SUM) // SOFTMAX_LAYER_MAX
+/** Shifts the values of the input tensor by the max calculated in softmax_layer_max kernel,
+ * then gets the exponent of each element as sums all elements across each row.
+ *
+ * @note Datatype must be given as a preprocessor argument using "#define DATA_TYPE_FP32"
+ *
+ * @note In case the input is not multiple of 4 NON_MULTIPLE_OF_4 must be passed.
+ *
+ * @param[in]  src_ptr                           Pointer to the source tensor slice. Supported data types: F32
+ * @param[in]  src_stride_x                      Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source tensor
+ * @param[in]  max_ptr                           Pointer to the max values tensor slice. Supported data types: same as @p src_ptr
+ * @param[in]  max_stride_x                      Stride of the max values tensor in X dimension (in bytes)
+ * @param[in]  max_step_x                        max_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  max_stride_y                      Stride of the max values tensor in Y dimension (in bytes)
+ * @param[in]  max_step_y                        max_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  max_stride_z                      Stride of the max values tensor in Z dimension (in bytes)
+ * @param[in]  max_step_z                        max_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  max_offset_first_element_in_bytes The offset of the first element in the max values tensor
+ * @param[out] dst_ptr                           Pointer to the destination tensor slice. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ * @param[out] sum_ptr                           Pointer to the sum values tensor slice. Supported data types: same as @p src_ptr
+ * @param[in]  sum_stride_x                      Stride of the sum values tensor in X dimension (in bytes)
+ * @param[in]  sum_step_x                        sum_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  sum_stride_y                      Stride of the sum values tensor in Y dimension (in bytes)
+ * @param[in]  sum_step_y                        sum_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  sum_stride_z                      Stride of the sum values tensor in Z dimension (in bytes)
+ * @param[in]  sum_step_z                        sum_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  sum_offset_first_element_in_bytes The offset of the first element in the sum values tensor
+ * @param[in]  width                             Input image width
+ */
+void main(void)
+{
+    Image src = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(src);
+    Image dst = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(dst);
+    Image max = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(max);
+    Image sum = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(sum);
+
+    // Load max value of 1D logits vector (row)
+    vec4 max_val = CONVERT(LOAD4(max, CURRENT_OFFSET(max)), vec4);
+
+    // Set sum vector
+    vec4 sum1D = CONVERT(0, vec4);
+
+    // Shift values, exp and sum
+    uint width2 = width >> 2;
+    for(int i = 0; i < int(width2); i++)
+    {
+        vec4 data = LOAD16(src, offset(src, i << 2, 0));
+        data      = SUB_OP(data, max_val);
+        data      = EXP_OP(data);
+        STORE16(dst, offset(dst, i << 2, 0), data);
+        sum1D = ADD_OP(sum1D, data);
+    }
+
+#ifdef NON_MULTIPLE_OF_4
+    // Handle non multiple of 4
+    for(int i = int(width2 << 2); i < int(width); i++)
+    {
+        float data;
+        data = LOAD4(src, offset(src, i, 0));
+        data = SUB_OP(data, max_val.x);
+        data = EXP_OP(data);
+        STORE4(dst, offset(dst, i, 0), data);
+        sum1D.x = ADD_OP(sum1D.x, data);
+    }
+#endif                            /* NON_MULTIPLE_OF_4 */
+
+    // Perform min/max reduction
+    sum1D.xy = ADD_OP(sum1D.xy, sum1D.zw);
+    sum1D.x  = ADD_OP(sum1D.x, sum1D.y);
+
+    // Calculate and store result
+    STORE4(sum, CURRENT_OFFSET(sum), sum1D.x);
+}
+#elif defined(SOFTMAX_LAYER_NORM) // SOFTMAX_LAYER_MAX
+/** Divides all the values of the input tensor by the sum calculated from softmax_layer_shift_exp_sum kernel.
+ *
+ * @note Datatype must be given as a preprocessor argument using "#define DATA_TYPE_FP32"
+ *
+ * @param[in]  src_ptr                           Pointer to the source tensor slice. Supported data types: F32
+ * @param[in]  src_stride_x                      Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source tensor
+ * @param[in]  sum_ptr                           Pointer to the sum values tensor slice. Supported data types: same as @p src_ptr
+ * @param[in]  sum_stride_x                      Stride of the sum values tensor in X dimension (in bytes)
+ * @param[in]  sum_step_x                        sum_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  sum_stride_y                      Stride of the sum values tensor in Y dimension (in bytes)
+ * @param[in]  sum_step_y                        sum_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  sum_stride_z                      Stride of the sum values tensor in Z dimension (in bytes)
+ * @param[in]  sum_step_z                        sum_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  sum_offset_first_element_in_bytes The offset of the first element in the sum values tensor
+ * @param[out] dst_ptr                           Pointer to the destination tensor slice. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ */
+void main(void)
+{
+    Image src = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(src);
+    Image dst = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(dst);
+    Image sum = CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(sum);
+
+    // Load max value of 1D logits vector (row)
+    vec4 sum_val = CONVERT(LOAD4(sum, offset(sum, 0, int(gl_GlobalInvocationID.y))), vec4);
+    vec4 data    = LOAD16(src, CURRENT_OFFSET(src));
+    STORE16(dst, CURRENT_OFFSET(dst), DIV_OP(data, sum_val));
+}
+#endif                            // SOFTMAX_LAYER_MAX
+
+#elif defined(DATA_TYPE_FP16)
+precision mediump float;
+
+const float MINVAL1   = -1.0 / 0.0;
+vec4        type_min1 = CONVERT(MINVAL1, vec4);
+
+#define GC_LOAD4_IMAGE(r, name, x, y)  \
+    load_and_unpack(r.xy, name, x, y); \
+    load_and_unpack(r.zw, name, (x + 2), y)
+
+#define GC_STORE4_IMAGE(r, name, x, y)                         \
+    GC_STORE1_2D_OFFSET(uint(packHalf2x16(r.xy)), name, x, y); \
+    GC_STORE1_2D_OFFSET(uint(packHalf2x16(r.zw)), name, (x + 2), y)
+
+#ifdef SOFTMAX_LAYER_MAX
+BUFFER_DECLARATION(src, 1, uint, readonly);
+BUFFER_DECLARATION(dst, 2, uint, writeonly);
+#elif defined(SOFTMAX_LAYER_SHIFT_EXP_SUM)
+BUFFER_DECLARATION(src, 1, uint, readonly);
+BUFFER_DECLARATION(max, 2, uint, readonly);
+BUFFER_DECLARATION(dst, 3, uint, writeonly);
+BUFFER_DECLARATION(sum, 4, uint, writeonly);
+#elif defined(SOFTMAX_LAYER_NORM)
+BUFFER_DECLARATION(src, 1, uint, readonly);
+BUFFER_DECLARATION(sum, 2, uint, readonly);
+BUFFER_DECLARATION(dst, 3, uint, writeonly);
+#endif // SOFTMAX_LAYER_MAX
+
+layout(std140) uniform shader_params
+{
+#ifdef SOFTMAX_LAYER_MAX
+    TENSOR3D_PARAM_DECLARATION(src);
+    TENSOR3D_PARAM_DECLARATION(dst);
+    uint width;
+#elif defined(SOFTMAX_LAYER_SHIFT_EXP_SUM)
+    TENSOR3D_PARAM_DECLARATION(src);
+    TENSOR3D_PARAM_DECLARATION(max);
+    TENSOR3D_PARAM_DECLARATION(dst);
+    TENSOR3D_PARAM_DECLARATION(sum);
+    uint width;
+#elif defined(SOFTMAX_LAYER_NORM)
+    TENSOR3D_PARAM_DECLARATION(src);
+    TENSOR3D_PARAM_DECLARATION(sum);
+    TENSOR3D_PARAM_DECLARATION(dst);
+#endif // SOFTMAX_LAYER_MAX
+};
+
+#define load_and_unpack(rs, names, xs, ys)           \
+    do                                               \
+    {                                                \
+        uint packed_s;                               \
+        GC_LOAD1_2D_OFFSET(packed_s, names, xs, ys); \
+        rs = vec2(unpackHalf2x16(packed_s));         \
+    } while(false)
+
+#ifdef SOFTMAX_LAYER_MAX
+/** Identifies the maximum value across the 1st dimension.
+ *
+ * @note Datatype must be given as a preprocessor argument using "#define DATA_TYPE_FP16"
+ *
+ * @param[in]  src_ptr                           Pointer to the source tensor slice. Supported data types: F16
+ * @param[in]  src_stride_x                      Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source tensor
+ * @param[out] dst_ptr                           Pointer to the destination tensor slice. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ * @param[in]  width                             Input image width
+ */
+void main(void)
+{
+    Image src = GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT(src);
+    Image dst = GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT(dst);
+
+    // Initialize local maximum
+    vec4 max_val1 = CONVERT(type_min1, vec4);
+
+    // Calculate max of row
+    uint width2 = width >> 2;
+    for(int i = 0; i < int(width2); i++)
+    {
+        vec4 data1;
+        GC_LOAD4_IMAGE(data1, src, (i << 2), 0);
+        max_val1 = MAX_OP(data1, max_val1);
+    }
+
+#ifdef NON_MULTIPLE_OF_4
+    // Handle non multiple of 4
+    for(int i = int(width2 << 2); i < int(width); i = i + 2)
+    {
+        vec2 data;
+        load_and_unpack(data, src, i, 0);
+        max_val1.x = MAX_OP(data.x, max_val1.x);
+        if((i + 1) < int(width))
+        {
+            max_val1.x = MAX_OP(data.y, max_val1.x);
+        }
+    }
+#endif                                     /* NON_MULTIPLE_OF_4 */
+
+    // Perform max reduction
+    max_val1.xy = MAX_OP(max_val1.xy, max_val1.zw);
+    max_val1.x  = MAX_OP(max_val1.x, max_val1.y);
+    vec2 res1   = vec2(max_val1.x, 0.f);
+    uint res;
+    res = uint(packHalf2x16(res1));
+
+    // Store result
+    GC_STORE1_2D_OFFSET(res, dst, 0, 0);
+}
+#elif defined(SOFTMAX_LAYER_SHIFT_EXP_SUM) // SOFTMAX_LAYER_MAX
+/** Shifts the values of the input tensor by the max calculated in softmax_layer_max kernel,
+ * then gets the exponent of each element as sums all elements across each row.
+ *
+ * @note Datatype must be given as a preprocessor argument using "#define DATA_TYPE_FP16"
+ *
+ * @note In case the input is not multiple of 4 NON_MULTIPLE_OF_4 must be passed.
+ *
+ * @param[in]  src_ptr                           Pointer to the source tensor slice. Supported data types: F16
+ * @param[in]  src_stride_x                      Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source tensor
+ * @param[in]  max_ptr                           Pointer to the max values tensor slice. Supported data types: same as @p src_ptr
+ * @param[in]  max_stride_x                      Stride of the max values tensor in X dimension (in bytes)
+ * @param[in]  max_step_x                        max_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  max_stride_y                      Stride of the max values tensor in Y dimension (in bytes)
+ * @param[in]  max_step_y                        max_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  max_stride_z                      Stride of the max values tensor in Z dimension (in bytes)
+ * @param[in]  max_step_z                        max_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  max_offset_first_element_in_bytes The offset of the first element in the max values tensor
+ * @param[out] dst_ptr                           Pointer to the destination tensor slice. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ * @param[out] sum_ptr                           Pointer to the sum values tensor slice. Supported data types: same as @p src_ptr
+ * @param[in]  sum_stride_x                      Stride of the sum values tensor in X dimension (in bytes)
+ * @param[in]  sum_step_x                        sum_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  sum_stride_y                      Stride of the sum values tensor in Y dimension (in bytes)
+ * @param[in]  sum_step_y                        sum_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  sum_stride_z                      Stride of the sum values tensor in Z dimension (in bytes)
+ * @param[in]  sum_step_z                        sum_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  sum_offset_first_element_in_bytes The offset of the first element in the sum values tensor
+ * @param[in]  width                             Input image width
+ */
+void main(void)
+{
+    Image src = GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT(src);
+    Image dst = GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT(dst);
+    Image max = GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT(max);
+    Image sum = GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT(sum);
+
+    // Load max value of 1D logits vector (row)
+    vec2 datamaxinit;
+    load_and_unpack(datamaxinit, max, 0, 0);
+    vec4 max_val = CONVERT(datamaxinit.x, vec4);
+
+    // Set sum vector
+    vec4 sum1D1 = CONVERT(0.f, vec4);
+
+    // Shift values, exp and sum
+    uint width2 = width >> 2;
+    for(int i = 0; i < int(width2); i++)
+    {
+        vec4 data;
+        GC_LOAD4_IMAGE(data, src, (i << 2), 0);
+        data = SUB_OP(data, max_val);
+        data = EXP_OP(data);
+        GC_STORE4_IMAGE(data, dst, (i << 2), 0);
+        sum1D1 = ADD_OP(sum1D1, data);
+    }
+
+#ifdef NON_MULTIPLE_OF_4
+    // Handle non multiple of 4
+    for(int i = int(width2 << 2); i < int(width); i = i + 2)
+    {
+        vec2  datamiddle;
+        float data1;
+        load_and_unpack(datamiddle, src, i, 0);
+        data1 = SUB_OP(datamiddle.x, max_val.x);
+        data1 = EXP_OP(data1);
+        vec2 datares1;
+        if((i + 1) < int(width))
+        {
+            float data2;
+            data2    = SUB_OP(datamiddle.y, max_val.x);
+            data2    = EXP_OP(data2);
+            datares1 = vec2(data1, data2);
+            data1    = ADD_OP(data2, data1);
+        }
+        else
+        {
+            datares1 = vec2(data1, 0.f);
+        }
+        uint datares;
+        datares = uint(packHalf2x16(datares1));
+        GC_STORE1_2D_OFFSET(datares, dst, i, 0);
+        sum1D1.x = ADD_OP(sum1D1.x, data1);
+    }
+#endif                            /* NON_MULTIPLE_OF_4 */
+
+    // Perform min/max reduction
+    sum1D1.xy = ADD_OP(sum1D1.xy, sum1D1.zw);
+    sum1D1.x  = ADD_OP(sum1D1.x, sum1D1.y);
+    vec2 res1 = vec2(sum1D1.x, 0.f);
+    uint res;
+    res = uint(packHalf2x16(res1));
+    // Calculate and store result
+    GC_STORE1_2D_OFFSET(res, sum, 0, 0);
+}
+#elif defined(SOFTMAX_LAYER_NORM) // SOFTMAX_LAYER_MAX
+/** Divides all the values of the input tensor by the sum calculated from softmax_layer_shift_exp_sum kernel.
+ *
+ * @note Datatype must be given as a preprocessor argument using "#define DATA_TYPE_FP16"
+ *
+ * @param[in]  src_ptr                           Pointer to the source tensor slice. Supported data types: F16
+ * @param[in]  src_stride_x                      Stride of the source tensor in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source tensor in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_stride_z                      Stride of the source tensor in Z dimension (in bytes)
+ * @param[in]  src_step_z                        src_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source tensor
+ * @param[in]  sum_ptr                           Pointer to the sum values tensor slice. Supported data types: same as @p src_ptr
+ * @param[in]  sum_stride_x                      Stride of the sum values tensor in X dimension (in bytes)
+ * @param[in]  sum_step_x                        sum_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  sum_stride_y                      Stride of the sum values tensor in Y dimension (in bytes)
+ * @param[in]  sum_step_y                        sum_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  sum_stride_z                      Stride of the sum values tensor in Z dimension (in bytes)
+ * @param[in]  sum_step_z                        sum_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  sum_offset_first_element_in_bytes The offset of the first element in the sum values tensor
+ * @param[out] dst_ptr                           Pointer to the destination tensor slice. Supported data types: same as @p src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination tensor in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_stride_z                      Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in]  dst_step_z                        dst_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ */
+void main(void)
+{
+    Image src = GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT(src);
+    Image dst = GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT(dst);
+    Image sum = GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(sum);
+
+    // Load max value of 1D logits vector (row)
+    vec2 sum1;
+    load_and_unpack(sum1, sum, 0, int(gl_GlobalInvocationID.y));
+    vec4 sum_val1 = CONVERT(sum1.x, vec4);
+
+    vec4 data1;
+    GC_LOAD4_IMAGE(data1, src, 0, 0);
+    vec4 res = DIV_OP(data1, sum_val1);
+    GC_STORE4_IMAGE(res, dst, 0, 0);
+}
+#endif                            // SOFTMAX_LAYER_MAX
+#endif                            // DATA_TYPE_FP32
\ No newline at end of file
diff --git a/src/core/GLES_COMPUTE/cs_shaders/transpose.cs b/src/core/GLES_COMPUTE/cs_shaders/transpose.cs
new file mode 100755
index 0000000000..6d020fe70d
--- /dev/null
+++ b/src/core/GLES_COMPUTE/cs_shaders/transpose.cs
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in;
+#include "helpers.h"
+
+#ifdef DATA_TYPE_FP32
+precision highp float;
+
+BUFFER_DECLARATION(src, 1, float, readonly);
+BUFFER_DECLARATION(dst, 2, float, writeonly);
+
+layout(std140) uniform shader_params
+{
+    IMAGE_PARAM_DECLARATION(src);
+    IMAGE_PARAM_DECLARATION(dst);
+};
+
+#define LOAD16(r, name, offset)          \
+    r.x = LOAD4(name, offset);           \
+    r.y = LOAD4(name, offset + uint(1)); \
+    r.z = LOAD4(name, offset + uint(2)); \
+    r.w = LOAD4(name, offset + uint(3))
+
+#define STORE16(name, offset, r)         \
+    STORE4(name, offset, r.x);           \
+    STORE4(name, offset + uint(1), r.y); \
+    STORE4(name, offset + uint(2), r.z); \
+    STORE4(name, offset + uint(3), r.w)
+
+/** This OpenGL ES kernel computes the matrix transposition of input matrix
+ *
+ * @param[in]  src_ptr                           Pointer to the source matrix. Supported data types: F32
+ * @param[in]  src_stride_x                      Stride of the source matrix in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source matrix in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source matrix
+ * @param[out] dst_ptr                           Pointer to the destination matrix Supported data type: same as src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination matrix in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_gx_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination matrix in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_gx_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination matrix
+ */
+void main(void)
+{
+    // Compute source address
+    Image src = CONVERT_TO_IMAGE_STRUCT(src);
+    Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+
+    // Load the NxN block at (x, y)
+    vec4 u0;
+    vec4 u1;
+    vec4 u2;
+    vec4 u3;
+    LOAD16(u0, src, offset(src, 0, 0));
+    LOAD16(u1, src, offset(src, 0, 1));
+    LOAD16(u2, src, offset(src, 0, 2));
+    LOAD16(u3, src, offset(src, 0, 3));
+
+    // Transpose the block
+    vec4 tmp;
+    tmp.xyz = u0.yzw;
+    u0.y    = u1.x;
+    u0.z    = u2.x;
+    u0.w    = u3.x;
+    u1.x    = tmp.x;
+    u2.x    = tmp.y;
+    u3.x    = tmp.z;
+    tmp.xy  = u1.zw;
+    u1.z    = u2.y;
+    u1.w    = u3.y;
+    u2.y    = tmp.x;
+    u3.y    = tmp.y;
+    tmp.x   = u2.w;
+    u2.w    = u3.z;
+    u3.z    = tmp.x;
+
+    // Store the block at (y, x)
+    uint dst_offset_in_bytes = uint(16) * uint(gl_GlobalInvocationID.y) + uint(4) * uint(gl_GlobalInvocationID.x) * (dst.stride_y) + (dst.offset_first_element_in_bytes);
+
+    STORE16(dst, uint((dst_offset_in_bytes + uint(0) * dst.stride_y) >> 2), u0);
+    STORE16(dst, uint((dst_offset_in_bytes + uint(1) * dst.stride_y) >> 2), u1);
+    STORE16(dst, uint((dst_offset_in_bytes + uint(2) * dst.stride_y) >> 2), u2);
+    STORE16(dst, uint((dst_offset_in_bytes + uint(3) * dst.stride_y) >> 2), u3);
+}
+
+#elif defined(DATA_TYPE_FP16)
+precision mediump float;
+
+BUFFER_DECLARATION(src, 1, uvec2, readonly);
+BUFFER_DECLARATION(dst, 2, uvec2, writeonly);
+
+layout(std140) uniform shader_params
+{
+    IMAGE_PARAM_DECLARATION(src);
+    IMAGE_PARAM_DECLARATION(dst);
+};
+
+/** This OpenGL ES kernel computes the matrix transposition of input matrix
+ *
+ * @param[in]  src_ptr                           Pointer to the source matrix. Supported data types: F16
+ * @param[in]  src_stride_x                      Stride of the source matrix in X dimension (in bytes)
+ * @param[in]  src_step_x                        src_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  src_stride_y                      Stride of the source matrix in Y dimension (in bytes)
+ * @param[in]  src_step_y                        src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  src_offset_first_element_in_bytes The offset of the first element in the source matrix
+ * @param[out] dst_ptr                           Pointer to the destination matrix Supported data type: same as src_ptr
+ * @param[in]  dst_stride_x                      Stride of the destination matrix in X dimension (in bytes)
+ * @param[in]  dst_step_x                        dst_gx_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in]  dst_stride_y                      Stride of the destination matrix in Y dimension (in bytes)
+ * @param[in]  dst_step_y                        dst_gx_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in]  dst_offset_first_element_in_bytes The offset of the first element in the destination matrix
+ */
+void main(void)
+{
+    // Compute source address
+    Image src = GC_CONVERT_TO_IMAGE_STRUCT(src);
+    Image dst = GC_CONVERT_TO_IMAGE_STRUCT(dst);
+
+    // Load the NxN block at (x, y)
+    vec4  u0;
+    vec4  u1;
+    vec4  u2;
+    vec4  u3;
+    uvec2 packed_s[4];
+    GC_LOAD1_2D_OFFSET(packed_s[0], src, 0, 0);
+    GC_LOAD1_2D_OFFSET(packed_s[1], src, 0, 1);
+    GC_LOAD1_2D_OFFSET(packed_s[2], src, 0, 2);
+    GC_LOAD1_2D_OFFSET(packed_s[3], src, 0, 3);
+    u0 = vec4(unpackHalf2x16(packed_s[0].x), unpackHalf2x16(packed_s[0].y));
+    u1 = vec4(unpackHalf2x16(packed_s[1].x), unpackHalf2x16(packed_s[1].y));
+    u2 = vec4(unpackHalf2x16(packed_s[2].x), unpackHalf2x16(packed_s[2].y));
+    u3 = vec4(unpackHalf2x16(packed_s[3].x), unpackHalf2x16(packed_s[3].y));
+
+    // Transpose the block
+    vec4 tmp;
+    tmp.xyz = u0.yzw;
+    u0.y    = u1.x;
+    u0.z    = u2.x;
+    u0.w    = u3.x;
+    u1.x    = tmp.x;
+    u2.x    = tmp.y;
+    u3.x    = tmp.z;
+    tmp.xy  = u1.zw;
+    u1.z    = u2.y;
+    u1.w    = u3.y;
+    u2.y    = tmp.x;
+    u3.y    = tmp.y;
+    tmp.x   = u2.w;
+    u2.w    = u3.z;
+    u3.z    = tmp.x;
+
+    // Store the block at (y, x)
+    uint dst_offset_in_bytes = uint(8) * uint(gl_GlobalInvocationID.y) + uint(gl_GlobalInvocationID.x) * (dst_step_y) + (dst.offset_first_element_in_bytes);
+
+    packed_s[0] = uvec2(packHalf2x16(u0.xy), packHalf2x16(u0.zw));
+    packed_s[1] = uvec2(packHalf2x16(u1.xy), packHalf2x16(u1.zw));
+    packed_s[2] = uvec2(packHalf2x16(u2.xy), packHalf2x16(u2.zw));
+    packed_s[3] = uvec2(packHalf2x16(u3.xy), packHalf2x16(u3.zw));
+    GC_STORE1(packed_s[0], dst, uint((dst_offset_in_bytes + uint(0) * dst_stride_y) >> 3));
+    GC_STORE1(packed_s[1], dst, uint((dst_offset_in_bytes + uint(1) * dst_stride_y) >> 3));
+    GC_STORE1(packed_s[2], dst, uint((dst_offset_in_bytes + uint(2) * dst_stride_y) >> 3));
+    GC_STORE1(packed_s[3], dst, uint((dst_offset_in_bytes + uint(3) * dst_stride_y) >> 3));
+}
+#endif /*ARM_COMPUTE_ENABLE_FP16*/
diff --git a/src/core/GLES_COMPUTE/egl_entries.in b/src/core/GLES_COMPUTE/egl_entries.in
new file mode 100644
index 0000000000..64ccda63c9
--- /dev/null
+++ b/src/core/GLES_COMPUTE/egl_entries.in
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+EGL_ENTRY(eglGetProcAddress)
+EGL_ENTRY(eglBindAPI)
+EGL_ENTRY(eglChooseConfig)
+EGL_ENTRY(eglCreateContext)
+EGL_ENTRY(eglDestroyContext)
+EGL_ENTRY(eglGetDisplay)
+EGL_ENTRY(eglInitialize)
+EGL_ENTRY(eglMakeCurrent)
+EGL_ENTRY(eglTerminate)
+EGL_ENTRY(eglGetError)
+EGL_ENTRY(eglQueryString)
diff --git a/src/core/GLES_COMPUTE/gl_entries.in b/src/core/GLES_COMPUTE/gl_entries.in
new file mode 100644
index 0000000000..15ce8ee819
--- /dev/null
+++ b/src/core/GLES_COMPUTE/gl_entries.in
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+GL_ENTRY(glAttachShader)
+GL_ENTRY(glCompileShader)
+GL_ENTRY(glCreateProgram)
+GL_ENTRY(glCreateShader)
+GL_ENTRY(glDeleteProgram)
+GL_ENTRY(glDeleteShader)
+GL_ENTRY(glDetachShader)
+GL_ENTRY(glGetProgramInfoLog)
+GL_ENTRY(glGetProgramiv)
+GL_ENTRY(glGetShaderInfoLog)
+GL_ENTRY(glGetShaderiv)
+GL_ENTRY(glLinkProgram)
+GL_ENTRY(glShaderSource)
+GL_ENTRY(glUseProgram)
+GL_ENTRY(glBindBuffer)
+GL_ENTRY(glBindBufferBase)
+GL_ENTRY(glBufferData)
+GL_ENTRY(glDeleteBuffers)
+GL_ENTRY(glDispatchCompute)
+GL_ENTRY(glFlush)
+GL_ENTRY(glGenBuffers)
+GL_ENTRY(glGetProgramResourceIndex)
+GL_ENTRY(glGetUniformLocation)
+GL_ENTRY(glMapBufferRange)
+GL_ENTRY(glMemoryBarrier)
+GL_ENTRY(glUniform1ui)
+GL_ENTRY(glUnmapBuffer)
+GL_ENTRY(glGetError)
+GL_ENTRY(glGetActiveUniformBlockiv)
+GL_ENTRY(glUniformBlockBinding)
+GL_ENTRY(glGetUniformBlockIndex)
+GL_ENTRY(glGenTextures)
+GL_ENTRY(glDeleteTextures)
+GL_ENTRY(glBindTexture)
+GL_ENTRY(glTexImage2D)
+GL_ENTRY(glGenFramebuffers)
+GL_ENTRY(glDeleteFramebuffers)
+GL_ENTRY(glBindFramebuffer)
+GL_ENTRY(glFramebufferTexture2D)
diff --git a/src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp
new file mode 100644
index 0000000000..d76ae8ff1c
--- /dev/null
+++ b/src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp
@@ -0,0 +1,112 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+#include "support/ToolchainSupport.h"
+
+#include <set>
+#include <string>
+
+using namespace arm_compute;
+
+GCAbsoluteDifferenceKernel::GCAbsoluteDifferenceKernel()
+    : _input1(nullptr), _input2(nullptr), _output(nullptr)
+{
+}
+
+void GCAbsoluteDifferenceKernel::configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8);
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8);
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, output);
+
+    _input1 = input1;
+    _input2 = input2;
+    _output = output;
+
+    constexpr unsigned int num_elems_processed_per_iteration = 4;
+
+    // Set kernel build options
+    std::set<std::string> build_opts;
+    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
+
+    // Create kernel
+    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("absdiff", build_opts));
+
+    // Configure kernel window
+    Window win = calculate_max_window(*input1->info(), Steps(num_elems_processed_per_iteration));
+
+    AccessWindowRectangle input1_access(input1->info(), 0, 0, 4, 1);
+    AccessWindowRectangle input2_access(input2->info(), 0, 0, 4, 1);
+    AccessWindowRectangle output_access(output->info(), 0, 0, 4, 1);
+
+    update_window_and_padding(win, input1_access, input2_access, output_access);
+
+    ValidRegion valid_region = intersect_valid_regions(input1->info()->valid_region(),
+                                                       input2->info()->valid_region());
+
+    output_access.set_valid_region(win, valid_region);
+
+    _kernel.clear_params();
+
+    // set shader params binding point
+    _kernel.set_shader_params_binding_point(0);
+
+    IGCKernel::configure(win);
+}
+
+void GCAbsoluteDifferenceKernel::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
+
+    _kernel.use();
+
+    Window slice = window.first_slice_window_2D();
+    do
+    {
+        unsigned int idx     = 0;
+        unsigned int binding = 1; // SSBO binding starts from 1.
+        add_2D_tensor_argument(idx, _input1, binding++, slice);
+        add_2D_tensor_argument(idx, _input2, binding++, slice);
+        add_2D_tensor_argument(idx, _output, binding++, slice);
+
+        _kernel.update_shader_params();
+
+        enqueue(*this, slice);
+    }
+    while(window.slide_window_slice_2D(slice));
+}
diff --git a/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp
new file mode 100644
index 0000000000..42433cf076
--- /dev/null
+++ b/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h"
+
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/IAccessWindow.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+#include "support/ToolchainSupport.h"
+
+#include <set>
+#include <string>
+
+using namespace arm_compute;
+
+GCActivationLayerKernel::GCActivationLayerKernel()
+    : _input(nullptr), _output(nullptr)
+{
+}
+
+void GCActivationLayerKernel::configure(IGCTensor *input, IGCTensor *output, ActivationLayerInfo act_info)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+
+    // Make sure _kernel is initialized before calling the parent's configure
+    _input  = input;
+    _output = input;
+
+    if(output != nullptr)
+    {
+        // Output auto inizialitation if not yet initialized
+        auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
+
+        ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
+        ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+        ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
+
+        _output = output;
+    }
+
+    unsigned int num_elems_processed_per_iteration = 4 / input->info()->element_size();
+
+    // Set build options
+    std::set<std::string> build_opts;
+    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
+    build_opts.emplace(("#define " + string_from_activation_func(act_info.activation())));
+    build_opts.emplace(("#define " + dt_name));
+    build_opts.emplace(("#define A_VAL " + float_to_string_with_full_precision(act_info.a())));
+    build_opts.emplace(("#define B_VAL " + float_to_string_with_full_precision(act_info.b())));
+    build_opts.emplace(("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)));
+    build_opts.emplace(("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)));
+    build_opts.emplace(("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)));
+
+    // Create kernel
+    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("activation_layer", build_opts));
+
+    // Configure kernel window
+    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
+
+    if(output != nullptr)
+    {
+        AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+
+        update_window_and_padding(win,
+                                  AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration),
+                                  output_access);
+
+        output_access.set_valid_region(win, input->info()->valid_region());
+    }
+    else
+    {
+        update_window_and_padding(win,
+                                  AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration));
+    }
+
+    _kernel.clear_params();
+
+    _kernel.set_shader_params_binding_point(0);
+
+    IGCKernel::configure(win);
+}
+
+void GCActivationLayerKernel::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
+
+    _kernel.use();
+
+    Window slice = window.first_slice_window_3D();
+
+    do
+    {
+        unsigned int idx     = 0;
+        unsigned int binding = 1;
+        add_3D_tensor_argument(idx, _input, binding++, slice);
+        add_3D_tensor_argument(idx, _output, binding++, slice);
+        _kernel.update_shader_params();
+        enqueue(*this, slice);
+    }
+    while(window.slide_window_slice_3D(slice));
+}
diff --git a/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp
new file mode 100644
index 0000000000..9c24d2ef42
--- /dev/null
+++ b/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h"
+
+#include "arm_compute/core/AccessWindowStatic.h"
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+GCBatchNormalizationLayerKernel::GCBatchNormalizationLayerKernel()
+    : _input(nullptr), _output(nullptr), _mean(nullptr), _var(nullptr), _beta(nullptr), _gamma(nullptr), _epsilon(0.0f)
+{
+}
+
+void GCBatchNormalizationLayerKernel::configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta, const IGCTensor *gamma,
+                                                float epsilon)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
+
+    // Output tensor auto initialization if not yet initialized
+    auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
+
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, mean, var, beta, gamma);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output, mean, var, beta, gamma);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(mean, var, beta, gamma);
+    ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != mean->info()->dimension(0));
+
+    _input   = input;
+    _output  = output;
+    _mean    = mean;
+    _var     = var;
+    _beta    = beta;
+    _gamma   = gamma;
+    _epsilon = epsilon;
+
+    const unsigned int num_elems_processed_per_iteration = 4 / input->info()->element_size();
+
+    // Set build options
+    std::set<std::string> build_opts;
+    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
+    build_opts.emplace(("#define " + dt_name));
+    build_opts.emplace(("#define ESPILON " + float_to_string_with_full_precision(_epsilon)));
+    build_opts.emplace(("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)));
+    build_opts.emplace(("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)));
+    build_opts.emplace(("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)));
+
+    // Create kernel
+    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("batchnormalization_layer", build_opts));
+
+    // Configure kernel window
+    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
+
+    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
+    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+    AccessWindowStatic     mean_access(mean->info(), 0, 0, mean->info()->dimension(0) + 1, mean->info()->dimension(1));
+    AccessWindowStatic     var_access(var->info(), 0, 0, var->info()->dimension(0) + 1, var->info()->dimension(1));
+    AccessWindowStatic     beta_access(beta->info(), 0, 0, beta->info()->dimension(0) + 1, beta->info()->dimension(1));
+    AccessWindowStatic     gamma_access(gamma->info(), 0, 0, gamma->info()->dimension(0) + 1, gamma->info()->dimension(1));
+
+    update_window_and_padding(win, input_access, output_access, mean_access, var_access, beta_access, gamma_access);
+    output_access.set_valid_region(win, input->info()->valid_region());
+
+    _kernel.clear_params();
+
+    _kernel.set_shader_params_binding_point(0);
+
+    IGCKernel::configure(win);
+}
+
+void GCBatchNormalizationLayerKernel::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+
+    _kernel.use();
+
+    Window slice = window.first_slice_window_3D();
+
+    Window vector_slice = window.first_slice_window_1D();
+    vector_slice.set(Window::DimX, Window::Dimension(0, 0, 0));
+
+    unsigned int idx = 2 * num_arguments_per_3D_tensor();
+    add_1D_tensor_argument(idx, _mean, 3, vector_slice);
+    add_1D_tensor_argument(idx, _var, 4, vector_slice);
+    add_1D_tensor_argument(idx, _beta, 5, vector_slice);
+    add_1D_tensor_argument(idx, _gamma, 6, vector_slice);
+
+    do
+    {
+        idx = 0;
+        add_3D_tensor_argument(idx, _input, 1, slice);
+        add_3D_tensor_argument(idx, _output, 2, slice);
+
+        _kernel.update_shader_params();
+        enqueue(*this, slice);
+    }
+    while(window.slide_window_slice_3D(slice));
+}
diff --git a/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp
new file mode 100644
index 0000000000..10716232c9
--- /dev/null
+++ b/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+
+using namespace arm_compute;
+
+GCCol2ImKernel::GCCol2ImKernel()
+    : _input(nullptr), _output(nullptr), _convolved_dims()
+{
+}
+
+void GCCol2ImKernel::configure(const IGCTensor *input, IGCTensor    *output,
+                               std::pair<unsigned int, unsigned int> convolved_dims)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+
+    _kernel.clear_params();
+
+    _input          = input;
+    _output         = output;
+    _convolved_dims = convolved_dims;
+
+    // Create kernel
+    std::set<std::string>  build_opts;
+    constexpr unsigned int num_elems_processed_per_iteration = 8;
+    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(num_elems_processed_per_iteration));
+    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(num_elems_processed_per_iteration));
+    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
+    build_opts.insert("#define COL2IM");
+    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("col2im", build_opts));
+
+    // Set static kernel arguments
+    unsigned int idx = num_arguments_per_2D_tensor() + num_arguments_per_3D_tensor();
+    _kernel.set_params(idx++, _convolved_dims.first);
+
+    // Configure window
+    Window win = calculate_max_window(*input->info(), Steps());
+
+    // The GCCol2ImKernel doesn't need padding so update_window_and_padding() can be skipped
+    output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
+
+    // set shader params binding point
+    _kernel.set_shader_params_binding_point(0);
+
+    IGCKernel::configure(win);
+}
+
+void GCCol2ImKernel::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window);
+
+    Window slice_in  = window.first_slice_window_2D();
+    Window slice_out = window.first_slice_window_3D();
+
+    _kernel.use();
+
+    do
+    {
+        // Set inputs
+        unsigned int idx     = 0;
+        unsigned int binding = 1;
+        add_2D_tensor_argument(idx, _input, binding++, slice_in);
+        add_3D_tensor_argument(idx, _output, binding++, slice_out);
+        _kernel.update_shader_params();
+        enqueue(*this, slice_in);
+    }
+    while(window.slide_window_slice_2D(slice_in) && window.slide_window_slice_3D(slice_out));
+}
diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.cpp
new file mode 100644
index 0000000000..7f9f438a46
--- /dev/null
+++ b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.cpp
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+GCDepthConcatenateKernel::GCDepthConcatenateKernel()
+    : _input(nullptr), _output(nullptr), _top_bottom(0), _left_right(0)
+{
+}
+
+BorderSize GCDepthConcatenateKernel::border_size() const
+{
+    return BorderSize(_top_bottom, _left_right);
+}
+
+void GCDepthConcatenateKernel::configure(const IGCTensor *input, unsigned int depth_offset, IGCTensor *output)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+    ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) + depth_offset > output->info()->dimension(2));
+    ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) > output->info()->dimension(0));
+    ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) > output->info()->dimension(1));
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(3, input, output);
+
+    // The gaps between the two lowest dimensions of input and output need to be divisible by 2
+    // Otherwise it is not clear how the padding should be added onto the input tensor
+    ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) - input->info()->dimension(0)) % 2);
+    ARM_COMPUTE_ERROR_ON((output->info()->dimension(1) - input->info()->dimension(1)) % 2);
+
+    _input  = input;
+    _output = output;
+
+    // Add build options
+    std::set<std::string> build_opts;
+    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
+    build_opts.emplace(("#define " + dt_name));
+    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
+
+    // Configure kernel window
+    _left_right = (output->info()->dimension(0) - input->info()->dimension(0)) / 2;
+    _top_bottom = (output->info()->dimension(1) - input->info()->dimension(1)) / 2;
+
+    const int offset_to_first_elements_in_bytes = depth_offset * output->info()->strides_in_bytes()[2];
+
+    build_opts.emplace("#define OFFSETS_X " + support::cpp11::to_string(_left_right));
+    build_opts.emplace("#define OFFSETS_Y " + support::cpp11::to_string(_top_bottom));
+    build_opts.emplace("#define OFFSETS_Z " + support::cpp11::to_string(offset_to_first_elements_in_bytes));
+
+    // Create kernel
+    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("concatenate_depth", build_opts));
+
+    unsigned int num_elems_processed_per_iteration = 1;
+    unsigned int num_elems_read_per_iteration      = 1;
+    if(input->info()->data_type() == DataType::F32)
+    {
+        num_elems_processed_per_iteration = 1;
+        num_elems_read_per_iteration      = 1;
+    }
+    else if(input->info()->data_type() == DataType::F16)
+    {
+        num_elems_processed_per_iteration = 4;
+        num_elems_read_per_iteration      = 4;
+    }
+    const unsigned int num_rows_read_per_iteration = 1;
+
+    // The window needs to be based on input as we copy all the depths of input
+    Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration));
+    win.set(Window::DimZ, Window::Dimension(0, input->info()->tensor_shape().z(), 1));
+
+    AccessWindowRectangle  input_access(input->info(), -_left_right, -_top_bottom, num_elems_read_per_iteration, num_rows_read_per_iteration);
+    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+    update_window_and_padding(win, input_access, output_access);
+    output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), output->info()->tensor_shape()));
+
+    _kernel.clear_params();
+    _kernel.set_shader_params_binding_point(0);
+    IGCKernel::configure(win);
+}
+
+void GCDepthConcatenateKernel::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
+
+    _kernel.use();
+
+    Window slice = window.first_slice_window_3D();
+
+    do
+    {
+        if(_input->info()->data_type() == DataType::F32)
+        {
+            unsigned int idx = 0;
+            add_3D_tensor_argument(idx, _input, 1, slice);
+            add_3D_tensor_argument(idx, _output, 2, slice);
+        }
+        else if(_input->info()->data_type() == DataType::F16)
+        {
+            unsigned int idx = 0;
+            add_3D_tensor_argument(idx, _input, BufferParam(1, 3), slice);
+            add_3D_tensor_argument(idx, _output, BufferParam(2, 3), slice);
+        }
+
+        _kernel.update_shader_params();
+
+        enqueue(*this, slice);
+    }
+    while(window.slide_window_slice_3D(slice));
+}
diff --git a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
new file mode 100644
index 0000000000..1fa2a71fff
--- /dev/null
+++ b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h"
+
+#include "arm_compute/core/AccessWindowStatic.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/IAccessWindow.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+template <unsigned int kernel_size>
+GCDirectConvolutionLayerKernel<kernel_size>::GCDirectConvolutionLayerKernel()
+    : _input(nullptr), _bias(nullptr), _weights(nullptr), _output(nullptr), _border_size(0), _conv_stride_x(0), _conv_stride_y(0), _conv_pad_x(0), _conv_pad_y(0), _lws(gles::NDRange(1U, 1U, 1U))
+{
+}
+
+template <unsigned int kernel_size>
+BorderSize             GCDirectConvolutionLayerKernel<kernel_size>::border_size() const
+{
+    return _border_size;
+}
+
+template <unsigned int kernel_size>
+void GCDirectConvolutionLayerKernel<kernel_size>::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *bias, IGCTensor *output, const PadStrideInfo &conv_info)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
+    ARM_COMPUTE_ERROR_ON(weights->info()->dimension(2) != input->info()->dimension(2));
+    ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != weights->info()->dimension(1));
+    ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4);
+    ARM_COMPUTE_ERROR_ON_MSG((kernel_size == 3 && std::get<0>(conv_info.stride()) > 2), "Strides larger than 2 not supported in 3x3 direct convolution!");
+    ARM_COMPUTE_ERROR_ON(kernel_size != weights->info()->dimension(0));
+
+    if(bias != nullptr)
+    {
+        ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, bias);
+        // FIXME: Bug in framework, workaround it in tests currently.
+        //ARM_COMPUTE_ERROR_ON(bias->info()->dimension(0) != weights->info()->dimension(3));
+        ARM_COMPUTE_ERROR_ON(bias->info()->num_dimensions() > 1);
+    }
+
+    _conv_stride_x = std::get<0>(conv_info.stride());
+    _conv_stride_y = std::get<1>(conv_info.stride());
+    _conv_pad_x    = std::get<0>(conv_info.pad());
+    _conv_pad_y    = std::get<1>(conv_info.pad());
+
+    _input       = input;
+    _weights     = weights;
+    _output      = output;
+    _bias        = bias;
+    _border_size = BorderSize(_conv_pad_y, _conv_pad_x);
+
+    std::set<std::string> options;
+
+    options.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(_lws[0]));
+    options.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(_lws[1]));
+    options.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(_lws[2]));
+    options.emplace("#define STRIDE_X " + support::cpp11::to_string(_conv_stride_x));
+
+    std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
+    options.emplace(("#define " + dt_name));
+
+    unsigned int num_elems_read_per_iteration_x    = kernel_size * _conv_stride_x;
+    unsigned int num_elems_read_per_iteration_y    = 1;
+    unsigned int num_elems_written_per_iteration_x = 1;
+    unsigned int num_elems_written_per_iteration_y = 1;
+    unsigned int num_elems_written_per_iteration_z = 1;
+
+    if(kernel_size == 3)
+    {
+        if((_conv_stride_x == 1) && (_conv_stride_y == 1))
+        {
+            switch(input->info()->data_type())
+            {
+                    // TODO(APPBROWSER-299): Choose the most optimal path and remove others.
+#define PROCESS_X_4ELEMENTS_Y_3ELEMENTS_FP16
+
+                case DataType::F16:
+#if defined(PROCESS_X_8ELEMENTS_Y_3ELEMENTS_FP16)
+                    options.emplace("#define PROCESS_X_8ELEMENTS_Y_3ELEMENTS_FP16");
+                    num_elems_read_per_iteration_x    = 16;
+                    num_elems_read_per_iteration_y    = 5;
+                    num_elems_written_per_iteration_x = 8;
+                    num_elems_written_per_iteration_y = 3;
+#elif defined(PROCESS_X_4ELEMENTS_Y_3ELEMENTS_FP16)
+                    options.emplace("#define PROCESS_X_4ELEMENTS_Y_3ELEMENTS_FP16");
+                    num_elems_read_per_iteration_x    = 8;
+                    num_elems_read_per_iteration_y    = 5;
+                    num_elems_written_per_iteration_x = 4;
+                    num_elems_written_per_iteration_y = 3;
+#elif defined(PROCESS_X_4ELEMENTS_Y_4ELEMENTS_FP16)
+                    options.emplace("#define PROCESS_X_4ELEMENTS_Y_4ELEMENTS_FP16");
+                    num_elems_read_per_iteration_x    = 8;
+                    num_elems_read_per_iteration_y    = 6;
+                    num_elems_written_per_iteration_x = 4;
+                    num_elems_written_per_iteration_y = 4;
+#elif defined(PROCESS_X_4ELEMENTS_Y_3ELEMENTS_Z_2ELEMENTS_FP16)
+                    options.emplace("#define PROCESS_X_4ELEMENTS_Y_3ELEMENTS_Z_2ELEMENTS_FP16");
+                    num_elems_read_per_iteration_x    = 8;
+                    num_elems_read_per_iteration_y    = 5;
+                    num_elems_written_per_iteration_x = 4;
+                    num_elems_written_per_iteration_y = 3;
+                    num_elems_written_per_iteration_z = 2;
+#endif /* PROCESS_X_8ELEMENTS_Y_3ELEMENTS_FP16 */
+                    break;
+
+                case DataType::F32:
+                    options.emplace("#define PROCESS_X_4ELEMENTS_Y_3ELEMENTS");
+                    num_elems_read_per_iteration_x    = 8;
+                    num_elems_read_per_iteration_y    = 5;
+                    num_elems_written_per_iteration_x = 4;
+                    num_elems_written_per_iteration_y = 3;
+                    break;
+
+                default:
+                    ARM_COMPUTE_ERROR("Current data type is not supported");
+                    break;
+            }
+        }
+        // FIXME: Just keep one in release
+        else
+        {
+            switch(input->info()->data_type())
+            {
+                case DataType::F16:
+                    options.emplace("#define PROCESS_X_4ELEMENTS_FP16");
+                    num_elems_read_per_iteration_x    = 8;
+                    num_elems_written_per_iteration_x = 4;
+                    break;
+
+                case DataType::F32:
+                    // TODO(APPBROWSER-299): Choose the most optimal path and remove others.
+#define PROCESS_4_ELEMENT
+
+#if defined(PROCESS_1_ELEMENT)
+                    options.emplace("#define PROCESS_1_ELEMENT");
+                    num_elems_read_per_iteration_x    = 3;
+                    num_elems_written_per_iteration_x = 1;
+#elif defined(PROCESS_4_ELEMENT)
+                    options.emplace("#define PROCESS_4_ELEMENT");
+                    num_elems_read_per_iteration_x    = 8;
+                    num_elems_written_per_iteration_x = 4;
+#elif defined(PROCESS_8_ELEMENT)
+                    options.emplace("#define PROCESS_8_ELEMENT");
+                    num_elems_read_per_iteration_x    = 12;
+                    num_elems_written_per_iteration_x = 8;
+#else /* PROCESS_1_ELEMENT */
+#error Have to declare how many elements to process in one thread.
+#endif /* PROCESS_1_ELEMENT */
+                    break;
+
+                default:
+                    ARM_COMPUTE_ERROR("Current data type is not supported");
+                    break;
+            }
+        }
+    }
+    else if(kernel_size == 1)
+    {
+        switch(input->info()->data_type())
+        {
+            case DataType::F16:
+                num_elems_read_per_iteration_x    = 8;
+                num_elems_written_per_iteration_x = 8;
+                break;
+
+            case DataType::F32:
+                num_elems_read_per_iteration_x    = 1;
+                num_elems_written_per_iteration_x = 1;
+                break;
+
+            default:
+                break;
+        }
+    }
+    else if(kernel_size == 5)
+    {
+        switch(input->info()->data_type())
+        {
+            case DataType::F16:
+                num_elems_read_per_iteration_x    = 8;
+                num_elems_written_per_iteration_x = 4;
+
+            default:
+                break;
+        }
+    }
+    else
+    {
+    }
+
+    if(_bias != nullptr)
+    {
+        options.emplace("#define BIAS");
+    }
+
+    std::stringstream kernel_name;
+    kernel_name << "direct_convolution" << kernel_size << "x" << kernel_size;
+
+    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name.str(), options));
+
+    _kernel.clear_params();
+
+    unsigned int idx = (_bias == nullptr) ? 3 * num_arguments_per_3D_tensor() : (num_arguments_per_1D_tensor() + 3 * num_arguments_per_3D_tensor());
+
+    // Calculate output right and bottom border
+    const int output_width          = output->info()->dimension(0);
+    const int output_height         = output->info()->dimension(1);
+    const int output_padding_right  = ceil_to_multiple(output_width, num_elems_written_per_iteration_x * _lws[0]) - output_width;
+    const int output_padding_bottom = ceil_to_multiple(output_height, num_elems_written_per_iteration_y * _lws[1]) - output_height;
+
+    // Calculate input right and bottom border
+    const int input_width    = input->info()->dimension(0);
+    const int input_height   = input->info()->dimension(1);
+    const int upper_bound_w  = ceil_to_multiple(((output_width + output_padding_right) * _conv_stride_x + (kernel_size - 1)), num_elems_read_per_iteration_x * _lws[0]) - _conv_pad_x - input_width;
+    const int upper_bound_h  = ceil_to_multiple(((output_height + output_padding_bottom) * _conv_stride_y + (kernel_size - 1)), num_elems_read_per_iteration_y * _lws[1]) - _conv_pad_y - input_height;
+    const int padding_right  = std::max(upper_bound_w, _conv_pad_x);
+    const int padding_bottom = std::max(upper_bound_h, _conv_pad_y);
+
+    BorderSize border = BorderSize(0, output_padding_right, output_padding_bottom, 0);
+
+    Window win = calculate_max_enlarged_window(*output->info(), Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y, num_elems_written_per_iteration_z), border);
+
+    AccessWindowStatic input_access(input->info(), -_conv_pad_x, -_conv_pad_y, input_width + padding_right, input_height + padding_bottom);
+    AccessWindowStatic weights_access = AccessWindowStatic(nullptr, 0, 0, 0, 0);
+    AccessWindowStatic bias_access    = AccessWindowStatic(nullptr, 0, 0, 0, 1);
+
+    switch(weights->info()->data_type())
+    {
+        case DataType::F16:
+            weights_access = AccessWindowStatic(weights->info(), 0, 0, kernel_size + 1, kernel_size);
+            if(_bias != nullptr)
+            {
+                bias_access = AccessWindowStatic(_bias->info(), 0, 0, _bias->info()->dimension(0) + 1, 1);
+            }
+            break;
+
+        case DataType::F32:
+            weights_access = AccessWindowStatic(weights->info(), 0, 0, kernel_size, kernel_size);
+            if(_bias != nullptr)
+            {
+                bias_access = AccessWindowStatic(_bias->info(), 0, 0, _bias->info()->dimension(0), 1);
+            }
+            break;
+
+        default:
+            ARM_COMPUTE_ERROR("Current data type is not supported");
+            break;
+    }
+
+    AccessWindowStatic output_access(output->info(), 0, 0, output_width + output_padding_right, output_height + output_padding_bottom);
+
+    if(_bias != nullptr)
+    {
+        update_window_and_padding(win, input_access, weights_access, bias_access, output_access);
+    }
+    else
+    {
+        update_window_and_padding(win, input_access, weights_access, output_access);
+    }
+
+    output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
+
+    _kernel.set_params(idx++, _weights->info()->strides_in_bytes()[3]); // weights_stride_w
+    _kernel.set_params(idx++, _weights->info()->dimension(2));          // weights_depth
+
+    // set shader params binding point
+    _kernel.set_shader_params_binding_point(0);
+
+    IGCKernel::configure(win);
+}
+
+template <unsigned int kernel_size>
+void GCDirectConvolutionLayerKernel<kernel_size>::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+
+    _kernel.use();
+
+    // Get initial windows
+    Window slice  = window.first_slice_window_3D();
+    Window win_in = window;
+
+    win_in.adjust(Window::DimX, -_conv_pad_x, true);
+    win_in.adjust(Window::DimY, -_conv_pad_y, true);
+    win_in.set_dimension_step(Window::DimX, window.x().step() * _conv_stride_x);
+    win_in.set_dimension_step(Window::DimY, window.y().step() * _conv_stride_y);
+
+    Window slice_in = win_in.first_slice_window_3D();
+
+    unsigned int idx1 = 2 * num_arguments_per_3D_tensor();
+    add_3D_tensor_argument(idx1, _weights, BufferParam(3, 2), slice);
+
+    if(_bias != nullptr)
+    {
+        Window slice_bias;
+        slice_bias.use_tensor_dimensions(_bias->info()->tensor_shape());
+        add_1D_tensor_argument(idx1, _bias, BufferParam(4, 2), slice_bias);
+    }
+
+    do
+    {
+        unsigned int idx = 0;
+
+        switch(_input->info()->data_type())
+        {
+            case DataType::F16:
+                switch(kernel_size)
+                {
+                    case 1:
+                        add_3D_tensor_argument(idx, _input, BufferParam(1, 4), slice_in);
+                        add_3D_tensor_argument(idx, _output, BufferParam(2, 4), slice);
+                        break;
+
+                    case 3:
+                        add_3D_tensor_argument(idx, _input, BufferParam(1, 3), slice_in);
+                        add_3D_tensor_argument(idx, _output, BufferParam(2, 3), slice);
+                        break;
+
+                    case 5:
+                        add_3D_tensor_argument(idx, _input, BufferParam(1, 3), slice_in);
+                        add_3D_tensor_argument(idx, _output, BufferParam(2, 3), slice);
+                        break;
+
+                    default:
+                        ARM_COMPUTE_ERROR("Current kernel size %d is not supported", kernel_size);
+                        break;
+                }
+                break;
+
+            case DataType::F32:
+                switch(kernel_size)
+                {
+                    case 1:
+                    case 5:
+                        add_3D_tensor_argument(idx, _input, BufferParam(1, 2), slice_in);
+                        add_3D_tensor_argument(idx, _output, BufferParam(2, 2), slice);
+                        break;
+
+                    case 3:
+                        add_3D_tensor_argument(idx, _input, BufferParam(1, 4), slice_in);
+                        add_3D_tensor_argument(idx, _output, BufferParam(2, 4), slice);
+                        break;
+
+                    default:
+                        ARM_COMPUTE_ERROR("Current kernel size %d is not supported", kernel_size);
+                        break;
+                }
+                break;
+
+            default:
+                ARM_COMPUTE_ERROR("Current data type is not supported");
+                break;
+        }
+
+        _kernel.update_shader_params();
+        enqueue(*this, slice, _lws);
+    }
+    while(window.slide_window_slice_3D(slice) && win_in.slide_window_slice_3D(slice_in));
+}
+
+template class arm_compute::GCDirectConvolutionLayerKernel<1>;
+template class arm_compute::GCDirectConvolutionLayerKernel<3>;
+template class arm_compute::GCDirectConvolutionLayerKernel<5>;
diff --git a/src/core/GLES_COMPUTE/kernels/GCDropoutKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDropoutKernel.cpp
new file mode 100644
index 0000000000..6244fbef80
--- /dev/null
+++ b/src/core/GLES_COMPUTE/kernels/GCDropoutKernel.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "support/ToolchainSupport.h"
+
+#include <cmath>
+#include <random>
+#include <tuple>
+
+using namespace arm_compute;
+
+GCDropoutKernel::GCDropoutKernel()
+    : _input(nullptr), _mask(nullptr), _output(nullptr), _num_elems_processed_per_iteration(0)
+{
+}
+
+void GCDropoutKernel::configure(const IGCTensor *input, IGCTensor *mask, IGCTensor *output, float ratio, bool forward)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, mask, output);
+
+    _input  = input;
+    _mask   = mask;
+    _output = output;
+    _kernel.clear_params();
+
+    std::set<std::string>                 build_opts;
+    std::string                           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
+    std::string                           fporbp  = forward ? "FORWARD" : "BACKWARD";
+    std::random_device                    rd;
+    std::mt19937                          mt(rd());
+    std::uniform_real_distribution<float> dist(0.f, 1.f);
+
+    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
+    build_opts.emplace("#define RATIO " + support::cpp11::to_string(ratio));
+    build_opts.emplace("#define SCALE " + support::cpp11::to_string(1. / (1. - ratio)));
+    build_opts.emplace("#define SEED " + support::cpp11::to_string(dist(mt)));
+    build_opts.emplace("#define " + dt_name);
+    build_opts.emplace("#define " + fporbp);
+
+    _num_elems_processed_per_iteration = 4 / input->info()->element_size();
+
+    // Create kernel
+    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("dropout", build_opts));
+
+    // Configure kernel window
+    Window win = calculate_max_window(*input->info(), Steps(_num_elems_processed_per_iteration));
+
+    output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
+
+    // set shader params binding point
+    _kernel.set_shader_params_binding_point(0);
+    IGCKernel::configure(win);
+}
+
+void GCDropoutKernel::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window);
+
+    _kernel.use();
+
+    Window slice = window.first_slice_window_3D();
+
+    do
+    {
+        unsigned int idx = 0;
+
+        add_3D_tensor_argument(idx, _input, BufferParam(1, 2), slice);
+        add_3D_tensor_argument(idx, _mask, BufferParam(2, 2), slice);
+        add_3D_tensor_argument(idx, _output, BufferParam(3, 2), slice);
+
+        _kernel.update_shader_params();
+        enqueue(*this, slice);
+    }
+    while(window.slide_window_slice_3D(slice));
+}
diff --git a/src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp
new file mode 100644
index 0000000000..36742ef81e
--- /dev/null
+++ b/src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+#include <cstdint>
+#include <set>
+#include <string>
+
+using namespace arm_compute;
+
+GCFillBorderKernel::GCFillBorderKernel()
+    : IGCKernel(), _tensor(nullptr)
+{
+}
+
+bool GCFillBorderKernel::is_parallelisable() const
+{
+    return false;
+}
+
+template <class T>
+void GCFillBorderKernel::set_constant_border(unsigned int idx, const PixelValue &constant_border_value)
+{
+    T value;
+    constant_border_value.get(value);
+    _kernel.set_params(idx, static_cast<T>(value));
+}
+
+void GCFillBorderKernel::configure(const IGCTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value)
+{
+    ARM_COMPUTE_ERROR_ON(tensor == nullptr);
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(tensor, 1, DataType::F32, DataType::F16);
+    ARM_COMPUTE_ERROR_ON(tensor->info()->num_channels() != 1);
+
+    border_size.limit(tensor->info()->padding());
+
+    // If there is no border: early exit
+    if(border_size.empty() || border_mode == BorderMode::UNDEFINED)
+    {
+        return;
+    }
+
+    // Select appropriate kernel
+    std::string kernel_name = "fill_image_borders_" + lower_string(string_from_border_mode(border_mode));
+
+    // Define build options
+    std::set<std::string> build_opts;
+    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
+    build_opts.emplace("#define BORDER_SIZE_TOP " + support::cpp11::to_string(border_size.top));
+    build_opts.emplace("#define BORDER_SIZE_BOTTOM " + support::cpp11::to_string(border_size.bottom));
+    build_opts.emplace("#define BORDER_SIZE_LEFT " + support::cpp11::to_string(border_size.left));
+    build_opts.emplace("#define BORDER_SIZE_RIGHT " + support::cpp11::to_string(border_size.right));
+
+    if(border_mode == BorderMode::REPLICATE)
+    {
+        build_opts.emplace("#define FILL_IMAGE_BORDERS_REPLICATE\n");
+    }
+    else
+    {
+        build_opts.emplace("#define FILL_IMAGE_BORDERS_CONSTANT\n");
+    }
+
+    switch(tensor->info()->data_type())
+    {
+        case DataType::F16:
+            build_opts.emplace("#define DATA_TYPE_FP16");
+            break;
+
+        case DataType::F32:
+            build_opts.emplace("#define DATA_TYPE_FP32");
+            break;
+
+        default:
+            ARM_COMPUTE_ERROR("Current data type is not supported");
+            break;
+    }
+
+    // Create kernel
+    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name, build_opts));
+    _tensor = tensor;
+
+    _kernel.clear_params();
+
+    // Create static kernel arguments
+    const unsigned int valid_width       = tensor->info()->valid_region().shape[0];
+    const unsigned int valid_height      = tensor->info()->valid_region().shape[1];
+    const unsigned int total_valid_width = border_size.left + valid_width + border_size.right;
+
+    // Set static kernel arguments
+    unsigned int idx = num_arguments_per_3D_tensor(); //Skip the tensor parameters
+    _kernel.set_params(idx++, valid_width);
+    _kernel.set_params(idx++, valid_height);
+    _kernel.set_params(idx++, tensor->info()->valid_region().anchor[0]);
+    _kernel.set_params(idx++, tensor->info()->valid_region().anchor[1]);
+
+    if(BorderMode::CONSTANT == border_mode)
+    {
+        set_constant_border<float>(idx++, constant_border_value);
+    }
+
+    // Configure kernel window
+    Window win;
+    win.set(Window::DimX, Window::Dimension(0, total_valid_width + valid_height));
+    win.set(Window::DimY, Window::Dimension(0, 1, 1));
+    win.use_tensor_dimensions(tensor->info()->tensor_shape(), Window::DimZ);
+
+    _kernel.set_shader_params_binding_point(0);
+
+    IGCKernel::configure(win);
+}
+
+void GCFillBorderKernel::run(const Window &window)
+{
+    // Border mode undefined or border width == 0
+    if(_kernel.get_program() == 0)
+    {
+        return;
+    }
+
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window);
+
+    _kernel.use();
+    Window slice = window.first_slice_window_3D();
+
+    do
+    {
+        unsigned int idx = 0;
+        add_3D_tensor_argument(idx, _tensor, 1, slice);
+
+        _kernel.update_shader_params();
+
+        enqueue(*this, slice);
+    }
+    while(window.slide_window_slice_3D(slice));
+}
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp
new file mode 100644
index 0000000000..5e3788af99
--- /dev/null
+++ b/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+using namespace arm_compute;
+
+GCGEMMInterleave4x4Kernel::GCGEMMInterleave4x4Kernel()
+    : _input(nullptr), _output(nullptr)
+{
+}
+
+void GCGEMMInterleave4x4Kernel::configure(const IGCTensor *input, IGCTensor *output)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
+
+    TensorShape output_shape = input->info()->tensor_shape();
+    output_shape.set(0, input->info()->dimension(0) * 4);
+    output_shape.set(1, std::ceil(input->info()->dimension(1) / 4.0f));
+
+    // Output auto inizialitation if not yet initialized
+    auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+
+    _input  = input;
+    _output = output;
+
+    std::set<std::string> build_opts;
+    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
+    build_opts.emplace(("#define " + dt_name));
+    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
+
+    // Create kernel
+    build_opts.emplace("#define GEMM_INTERLEAVE4x4");
+    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("gemm_interleave4x4", build_opts));
+
+    // Configure kernel window
+    const unsigned int     num_elems_processed_per_iteration_x = max_gc_vector_width / data_size_from_type(input->info()->data_type());
+    constexpr unsigned int num_elems_processed_per_iteration_y = 4;
+    const unsigned int     num_elems_written_per_iteration     = num_elems_processed_per_iteration_x * num_elems_processed_per_iteration_y;
+
+    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
+
+    AccessWindowRectangle input_access(input->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
+    AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_written_per_iteration, 1, 4.f, 0.25f);
+
+    update_window_and_padding(win, input_access, output_access);
+
+    output_access.set_valid_region(win, input->info()->valid_region());
+
+    _kernel.clear_params();
+
+    // set shader params binding point
+    _kernel.set_shader_params_binding_point(0);
+
+    IGCKernel::configure(win);
+}
+
+void GCGEMMInterleave4x4Kernel::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
+
+    _kernel.use();
+
+    /*
+     * This kernel puts the values in a 4x4 block of Matrix A on the same row (Interleaved values)
+     *         |a00 a01 a02 a03|
+     *         |a10 a11 a12 a13|
+     *         |a20 a21 a22 a23| = | a00 a10 a20 a30 || a01 a11 a21 a31 || a02 a12 a22 a32 || a03 a13 a23 a33 |
+     *         |a30 a31 a32 a33|
+     *
+     * After this operation, the output matrix will have the following shape: [ height * 4, width / 4 ]
+     */
+    Window in_slice  = window.first_slice_window_2D();
+    Window out_slice = window.first_slice_window_2D();
+
+    // Change x and y steps for the slide of output tensor
+    out_slice.scale(Window::DimX, 4.f);
+    out_slice.scale(Window::DimY, 0.25f);
+
+    do
+    {
+        unsigned int idx = 0;
+        add_2D_tensor_argument(idx, _input, 1, in_slice);
+        add_2D_tensor_argument(idx, _output, 2, out_slice);
+
+        _kernel.update_shader_params();
+
+        enqueue(*this, in_slice);
+    }
+    while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice));
+}
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp
new file mode 100644
index 0000000000..434070a46c
--- /dev/null
+++ b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h"
+
+#include "arm_compute/core/AccessWindowStatic.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+
+using namespace arm_compute;
+
+GCGEMMMatrixAccumulateBiasesKernel::GCGEMMMatrixAccumulateBiasesKernel()
+    : _accum(nullptr), _biases(nullptr)
+{
+}
+
+void GCGEMMMatrixAccumulateBiasesKernel::configure(IGCTensor *accum, const IGCTensor *biases)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum);
+    ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() != 1);
+
+    _biases = biases;
+    _accum  = accum;
+
+    std::set<std::string> build_opts;
+    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
+
+    // Create kernel
+    build_opts.emplace("#define GEMM_ACCUMULATE_BIASES");
+    std::string dt_name = (accum->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
+    build_opts.emplace(("#define " + dt_name));
+    _kernel = GCKernelLibrary::get().create_kernel("gemm_accumulate_biases", build_opts);
+
+    // Configure kernel window
+    unsigned int num_elems_processed_per_iteration = 1;
+
+    if(_accum->info()->data_type() == DataType::F32)
+    {
+        num_elems_processed_per_iteration = 16;
+    }
+    else if(_accum->info()->data_type() == DataType::F16)
+    {
+        num_elems_processed_per_iteration = 4;
+    }
+
+    Window win = calculate_max_window(*_accum->info(), Steps(num_elems_processed_per_iteration));
+
+    AccessWindowStatic     biases_access(biases->info(), 0, 0, ceil_to_multiple(biases->info()->dimension(0), num_elems_processed_per_iteration), biases->info()->dimension(1));
+    AccessWindowHorizontal accum_access(_accum->info(), 0, num_elems_processed_per_iteration);
+
+    update_window_and_padding(win, biases_access, accum_access);
+
+    _kernel.clear_params();
+    // set shader params binding point
+    _kernel.set_shader_params_binding_point(0);
+
+    IGCKernel::configure(win);
+}
+
+void GCGEMMMatrixAccumulateBiasesKernel::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window);
+
+    _kernel.use();
+
+    Window accum_slice = window.first_slice_window_2D();
+
+    Window biases_slice(accum_slice);
+    biases_slice.set(Window::DimY, Window::Dimension(0, 1, 1));
+
+    // Run kernel
+    do
+    {
+        // Set arguments
+        unsigned int idx = 0;
+        if(_accum->info()->data_type() == DataType::F32)
+        {
+            add_2D_tensor_argument(idx, _accum, 1, accum_slice);
+            add_1D_tensor_argument(idx, _biases, 2, biases_slice);
+        }
+        else if(_accum->info()->data_type() == DataType::F16)
+        {
+            add_2D_tensor_argument(idx, _accum, BufferParam(1, 3), accum_slice);
+            add_1D_tensor_argument(idx, _biases, BufferParam(2, 3), biases_slice);
+        }
+
+        _kernel.update_shader_params();
+
+        enqueue(*this, accum_slice);
+    }
+    while(window.slide_window_slice_2D(accum_slice));
+}
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp
new file mode 100644
index 0000000000..fa0415249a
--- /dev/null
+++ b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+using namespace arm_compute;
+
+GCGEMMMatrixAdditionKernel::GCGEMMMatrixAdditionKernel()
+    : _input(nullptr), _output(nullptr)
+{
+}
+
+void GCGEMMMatrixAdditionKernel::configure(const IGCTensor *input, IGCTensor *output, float beta)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+    ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != output->info()->dimension(0));
+    ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != output->info()->dimension(1));
+
+    _input                                               = input;
+    _output                                              = output;
+    const unsigned int num_elems_processed_per_iteration = max_gc_vector_width / data_size_from_type(input->info()->data_type());
+
+    std::set<std::string> build_opts;
+    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
+    build_opts.emplace(("#define " + dt_name));
+    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
+    build_opts.emplace("#define BETA " + float_to_string_with_full_precision(beta));
+
+    // Create kernel
+    build_opts.emplace("#define GEMM_MATRIXADDITION");
+    std::string data_type_name = lower_string(string_from_data_type(input->info()->data_type()));
+    _kernel                    = GCKernelLibrary::get().create_kernel(("gemm_ma"), build_opts);
+
+    // Configure kernel window
+    Window win = calculate_max_window(*_input->info(), Steps(num_elems_processed_per_iteration));
+
+    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
+    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+
+    update_window_and_padding(win, input_access, output_access);
+
+    output_access.set_valid_region(win, input->info()->valid_region());
+
+    _kernel.clear_params();
+    // set shader params binding point
+    _kernel.set_shader_params_binding_point(0);
+
+    IGCKernel::configure(win);
+}
+
+void GCGEMMMatrixAdditionKernel::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
+
+    _kernel.use();
+
+    Window slice = window.first_slice_window_2D();
+
+    do
+    {
+        unsigned int idx = 0;
+        add_2D_tensor_argument(idx, _input, 1, slice);
+        add_2D_tensor_argument(idx, _output, 2, slice);
+
+        _kernel.update_shader_params();
+
+        enqueue(*this, slice);
+    }
+    while(window.slide_window_slice_2D(slice));
+}
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp
new file mode 100644
index 0000000000..ea9b3874b2
--- /dev/null
+++ b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp
@@ -0,0 +1,210 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h"
+
+#include "arm_compute/core/AccessWindowStatic.h"
+#include "arm_compute/core/AccessWindowTranspose.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+#include <set>
+#include <string>
+
+using namespace arm_compute;
+
+GCGEMMMatrixMultiplyKernel::GCGEMMMatrixMultiplyKernel()
+    : _input0(nullptr), _input1(nullptr), _output(nullptr)
+{
+}
+
+void GCGEMMMatrixMultiplyKernel::configure(const IGCTensor *input0, const IGCTensor *input1, IGCTensor *output, float alpha, bool is_interleaved_transposed)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F32, DataType::F16);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1, output);
+
+    if(!is_interleaved_transposed)
+    {
+        ARM_COMPUTE_ERROR_ON(input0->info()->dimension(0) != input1->info()->dimension(1));
+    }
+
+    _input0 = input0;
+    _input1 = input1;
+    _output = output;
+
+    std::set<std::string> build_opts;
+    Window                win;
+
+    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
+    build_opts.emplace("#define COLS_A " + support::cpp11::to_string(input0->info()->dimension(0)));
+    build_opts.emplace("#define COLS_B " + support::cpp11::to_string(input1->info()->dimension(0)));
+    build_opts.emplace("#define ALPHA " + float_to_string_with_full_precision(alpha));
+
+    // Check if the output tensor is a vector. If so,the kernel runs the vector-matrix multiplication
+    if(is_interleaved_transposed)
+    {
+        switch(input0->info()->data_type())
+        {
+            case DataType::F16:
+                build_opts.emplace("#define DATA_TYPE_FP16");
+                break;
+
+            case DataType::F32:
+                build_opts.emplace("#define DATA_TYPE_FP32");
+                break;
+
+            default:
+                ARM_COMPUTE_ERROR("Current data type is not supported");
+                break;
+        }
+
+        build_opts.emplace("#define GEMM_MM_INTERLEAVED_TRANSPOSED");
+
+        // Create kernel
+        _kernel = GCKernelLibrary::get().create_kernel(("gemm_mm_interleaved_transposed"), build_opts);
+
+        // Configure window kernel
+        const unsigned int     num_elems_processed_per_iteration_x = max_gc_vector_width / data_size_from_type(input0->info()->data_type());
+        constexpr unsigned int num_elems_processed_per_iteration_y = 4;
+
+        win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
+
+        AccessWindowRectangle input0_access(input0->info(), 0, 0, num_elems_processed_per_iteration_y, 1, 1.f, 0.25f);
+        AccessWindowTranspose input1_access(input1->info(), 0, 0, num_elems_processed_per_iteration_x, 1, 0.f, 0.25f);
+        AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
+
+        update_window_and_padding(win, input0_access, input1_access, output_access);
+
+        output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), output->info()->tensor_shape()));
+    }
+    else
+    {
+        ARM_COMPUTE_ERROR_ON(input0->info()->dimension(0) != input1->info()->dimension(1));
+
+        // Special case for 1xN, 2xN, 3xN and 4xN input0 tensor
+        unsigned int num_elems_processed_per_iteration_x;
+        unsigned int num_elems_processed_per_iteration_y;
+
+        switch(input0->info()->data_type())
+        {
+            case DataType::F16:
+                num_elems_processed_per_iteration_x = 4;
+                num_elems_processed_per_iteration_y = 1;
+                build_opts.emplace("#define DATA_TYPE_FP16");
+                break;
+
+            case DataType::F32:
+                num_elems_processed_per_iteration_x = max_gc_vector_width / data_size_from_type(input0->info()->data_type());
+                num_elems_processed_per_iteration_y = std::min(static_cast<int>(output->info()->dimension(1)), 4);
+                build_opts.emplace("#define DATA_TYPE_FP32");
+                break;
+
+            default:
+                ARM_COMPUTE_ERROR("Current data type is not supported");
+                break;
+        }
+
+        build_opts.emplace("#define GEMM_MM_FLOATING_POINT");
+        build_opts.emplace("#define NUM_ELEMS_PROCESSED_PER_THREAD_X " + support::cpp11::to_string(num_elems_processed_per_iteration_x));
+        build_opts.emplace("#define NUM_ELEMS_PROCESSED_PER_THREAD_Y " + support::cpp11::to_string(num_elems_processed_per_iteration_y));
+
+        // Create kernel
+        _kernel = GCKernelLibrary::get().create_kernel("gemm_mm_floating_point", build_opts);
+
+        win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y));
+
+        AccessWindowStatic input0_access(input0->info(), 0, 0, ceil_to_multiple(input0->info()->dimension(0), num_elems_processed_per_iteration_x), ceil_to_multiple(input0->info()->dimension(1),
+                                         num_elems_processed_per_iteration_y));
+        AccessWindowStatic    input1_access(input1->info(), 0, 0, ceil_to_multiple(input1->info()->dimension(0), num_elems_processed_per_iteration_x), input1->info()->dimension(1));
+        AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y);
+
+        update_window_and_padding(win, input0_access, input1_access, output_access);
+
+        Coordinates coord;
+        coord.set_num_dimensions(output->info()->num_dimensions());
+        output_access.set_valid_region(win, ValidRegion(coord, output->info()->tensor_shape()));
+    }
+
+    _kernel.clear_params();
+    _kernel.set_shader_params_binding_point(0);
+    IGCKernel::configure(win);
+}
+
+void GCGEMMMatrixMultiplyKernel::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
+
+    _kernel.use();
+
+    Window slice          = window.first_slice_window_2D();
+    Window slice_matrix_b = slice;
+
+    slice_matrix_b.set(Window::DimX, Window::Dimension(0, 1, 1));
+    slice_matrix_b.set(Window::DimY, Window::Dimension(0, 1, 1));
+
+    do
+    {
+        Window slice_b = slice;
+        // Don't slice matrix B along the z dimension if matrix B has just 2 dimensions and matrix A more than 2
+        // This scenario can happen when the the matrix multiplication is used to perform a convolution operation
+        if(_input1->info()->num_dimensions() < 3)
+        {
+            slice_b = slice_matrix_b;
+        }
+
+        unsigned int idx = 0;
+        switch(_input0->info()->data_type())
+        {
+            case DataType::F16:
+                add_2D_tensor_argument(idx, _input0, BufferParam(1, 2), slice);
+                add_2D_tensor_argument(idx, _input1, BufferParam(2, 3), slice_b);
+                add_2D_tensor_argument(idx, _output, BufferParam(3, 3), slice);
+                break;
+
+            case DataType::F32:
+                add_2D_tensor_argument(idx, _input0, BufferParam(1, 2), slice);
+                add_2D_tensor_argument(idx, _input1, BufferParam(2, 2), slice_b);
+                add_2D_tensor_argument(idx, _output, BufferParam(3, 2), slice);
+                break;
+
+            default:
+                ARM_COMPUTE_ERROR("Current data type is not supported");
+                break;
+        }
+
+        _kernel.update_shader_params();
+        enqueue(*this, slice);
+    }
+    while(window.slide_window_slice_2D(slice));
+}
diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp
new file mode 100644
index 0000000000..a1270b4c3d
--- /dev/null
+++ b/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h"
+
+#include "arm_compute/core/AccessWindowTranspose.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+#include <cmath>
+
+using namespace arm_compute;
+
+void GCGEMMTranspose1xWKernel::configure(const IGCTensor *input, IGCTensor *output)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
+
+    TensorShape  output_shape{ input->info()->tensor_shape() };
+    const size_t transpose_w = 16 / input->info()->element_size();
+    output_shape.set(0, input->info()->dimension(1) * transpose_w);
+    output_shape.set(1, static_cast<size_t>(std::ceil((input->info()->dimension(0) / static_cast<float>(transpose_w)))));
+
+    // Output tensor auto inizialitation if not yet initialized
+    auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
+
+    const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size();
+    const int          scale_x                           = num_elems_processed_per_iteration;
+
+    _input  = input;
+    _output = output;
+
+    std::set<std::string> build_opts;
+    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
+    build_opts.emplace(("#define " + dt_name));
+    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
+    /*
+     * Following an example of how the transposition1xW works when the input data type is F32
+     *
+     *         |a00 a01 a02 a03|
+     *         |a10 a11 a12 a13|
+     *         |a20 a21 a22 a23| = | a00 a01 a02 a03 || a10 a11 a12 a13 || a20 a21 a22 a23 || a30 a31 a32 a33 |
+     *         |a30 a31 a32 a33|
+     *
+     * The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor)
+     */
+    // Create kernel
+    build_opts.emplace("#define GEMM_TRANSPOSE1xW");
+    _kernel = GCKernelLibrary::get().create_kernel("gemm_transpose1x4", build_opts);
+
+    // Configure window
+    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
+
+    ARM_COMPUTE_ERROR_ON_MSG((win.x().end() / scale_x) == 0, "Transposed shape would be 0 in the second dimension");
+
+    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
+    AccessWindowTranspose  output_access(output->info(), 0, 0, num_elems_processed_per_iteration, 1, scale_x, 1.f / scale_x);
+
+    update_window_and_padding(win, input_access, output_access);
+
+    output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), input->info()->tensor_shape()));
+
+    _kernel.clear_params();
+    // set shader params binding point
+    _kernel.set_shader_params_binding_point(0);
+
+    IGCKernel::configure(win);
+}
+
+void GCGEMMTranspose1xWKernel::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
+
+    _kernel.use();
+
+    // Output is transposed
+    Window out_window(window);
+    out_window.set(Window::DimX, window.y());
+    out_window.set(Window::DimY, window.x());
+
+    Window in_slice  = window.first_slice_window_2D();
+    Window out_slice = out_window.first_slice_window_2D();
+
+    do
+    {
+        unsigned int idx = 0;
+        add_2D_tensor_argument(idx, _input, 1, in_slice);
+        add_2D_tensor_argument(idx, _output, 2, out_slice);
+
+        _kernel.update_shader_params();
+
+        enqueue(*this, in_slice);
+    }
+    while(window.slide_window_slice_2D(in_slice) && out_window.slide_window_slice_2D(out_slice));
+}
diff --git a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp
new file mode 100644
index 0000000000..935d8420ff
--- /dev/null
+++ b/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h"
+
+#include "arm_compute/core/AccessWindowStatic.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "support/ToolchainSupport.h"
+
+#include <cmath>
+#include <tuple>
+
+using namespace arm_compute;
+
+GCIm2ColKernel::GCIm2ColKernel()
+    : _input(nullptr), _output(nullptr), _convolved_dims(), _num_elems_processed_per_iteration(1), _run_func(nullptr)
+{
+}
+
+void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, std::pair<unsigned int, unsigned int> kernel_dims, const PadStrideInfo &conv_info, bool has_bias)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+    ARM_COMPUTE_UNUSED(kernel_dims);
+
+    _input  = input;
+    _output = output;
+    _kernel.clear_params();
+
+    std::set<std::string> build_opts;
+    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
+    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
+    build_opts.insert("#define " + dt_name);
+
+    if(has_bias)
+    {
+        build_opts.emplace("#define HAS_BIAS");
+    }
+
+    int pad_x    = 0;
+    int pad_y    = 0;
+    int stride_x = 0;
+    int stride_y = 0;
+    std::tie(pad_x, pad_y)       = conv_info.pad();
+    std::tie(stride_x, stride_y) = conv_info.stride();
+
+    const bool run_img2col_reduced = (output->info()->dimension(0) == (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))) && (TensorShape::num_max_dimensions >= 4)
+                                     && (std::equal(input->info()->tensor_shape().cbegin() + 3,
+                                                    input->info()->tensor_shape().cend(),
+                                                    output->info()->tensor_shape().cbegin() + 1))
+                                     && ((stride_x == 1) && (stride_y == 1) && (pad_x == 0) && (pad_y == 0));
+
+    if(!run_img2col_reduced)
+    {
+        // this path is currently not used and not validated
+        build_opts.insert("#define IM2COL_GENERIC");
+        _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1),
+                                            kernel_dims.first, kernel_dims.second,
+                                            conv_info);
+        _num_elems_processed_per_iteration = output->info()->dimension(0);
+
+        build_opts.emplace("#define KERNEL_WIDTH " + support::cpp11::to_string(kernel_dims.first));
+        build_opts.emplace("#define KERNEL_HEIGHT " + support::cpp11::to_string(kernel_dims.second));
+        build_opts.emplace("#define KERNEL_DEPTH " + support::cpp11::to_string(input->info()->dimension(2)));
+        build_opts.emplace("#define CONVOLVED_WIDTH " + support::cpp11::to_string(_convolved_dims.first));
+        build_opts.emplace("#define CONVOLVED_HEIGHT " + support::cpp11::to_string(_convolved_dims.second));
+        build_opts.emplace("#define STRIDE_X " + support::cpp11::to_string(conv_info.stride().first));
+        build_opts.emplace("#define STRIDE_Y " + support::cpp11::to_string(conv_info.stride().second));
+        build_opts.emplace("#define PAD_X " + support::cpp11::to_string(conv_info.pad().first));
+        build_opts.emplace("#define PAD_Y " + support::cpp11::to_string(conv_info.pad().second));
+        build_opts.emplace("#define SRC_WIDTH " + support::cpp11::to_string(input->info()->dimension(0)));
+        build_opts.emplace("#define SRC_HEIGHT " + support::cpp11::to_string(input->info()->dimension(1)));
+
+        // Create kernel
+        _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("im2col_generic", build_opts));
+
+        _run_func = &GCIm2ColKernel::run_generic;
+    }
+    else
+    {
+        build_opts.insert("#define IM2COL_REDUCED");
+        _num_elems_processed_per_iteration = 4 / input->info()->element_size();
+
+        // Create kernel
+        _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("im2col_reduced", build_opts));
+
+        _run_func = &GCIm2ColKernel::run_reduced;
+    }
+
+    // Configure kernel window
+    Window win = calculate_max_window(*input->info(), Steps(_num_elems_processed_per_iteration));
+
+    if(input->info()->data_type() == DataType::F16)
+    {
+        // Calculate input right and bottom border
+        AccessWindowHorizontal input_access(input->info(), 0, _num_elems_processed_per_iteration);
+
+        // Calculate output right and bottom border
+        const int          output_width         = output->info()->dimension(0);
+        const int          output_height        = output->info()->dimension(1);
+        const int          output_padding_right = ceil_to_multiple(output_width, _num_elems_processed_per_iteration) - output_width;
+        AccessWindowStatic output_access(output->info(), 0, 0, output_width + output_padding_right, output_height);
+
+        update_window_and_padding(win, input_access, output_access);
+    }
+
+    output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape()));
+
+    if(!run_img2col_reduced)
+    {
+        // set the Z dimension's step same size as the whole dimension so that one can't split across the Z dimension
+        win.set_dimension_step(Window::DimZ, win[Window::DimZ].end() - win[Window::DimZ].start());
+    }
+
+    // set shader params binding point
+    _kernel.set_shader_params_binding_point(0);
+    IGCKernel::configure(win);
+}
+
+void GCIm2ColKernel::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON(_run_func == nullptr);
+    (this->*_run_func)(window);
+}
+
+void GCIm2ColKernel::run_generic(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window);
+
+    // Get initial windows
+    Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ);
+    // Change the Z dimension's step back to 1
+    window_collapsed.set_dimension_step(Window::DimZ, 1);
+
+    Window slice     = window_collapsed.first_slice_window_3D();
+    Window slice_in  = window_collapsed.first_slice_window_3D();
+    Window slice_out = window_collapsed.first_slice_window_3D();
+
+    // Setup slice
+    slice.set(Window::DimX, Window::Dimension(0, static_cast<int>(_convolved_dims.first), 1));
+    slice.set(Window::DimY, Window::Dimension(0, static_cast<int>(_convolved_dims.second), 1));
+
+    // Setup input slice
+    // The first three dimensions of the input are increased by the inner loops
+    slice_in.set(Window::DimX, Window::Dimension(0, 0, 0));
+    slice_in.set(Window::DimY, Window::Dimension(0, 0, 0));
+    slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0));
+
+    // Setup output slice
+    slice_out.set(Window::DimX, Window::Dimension(0, _output->info()->dimension(0), _num_elems_processed_per_iteration));
+    slice_out.set(Window::DimY, Window::Dimension(0, _output->info()->dimension(1), 1));
+    slice_out.set(Window::DimZ, Window::Dimension(0, 1, 1));
+
+    _kernel.use();
+
+    do
+    {
+        unsigned int idx = 0;
+        add_3D_tensor_argument(idx, _input, 1, slice_in);
+        add_2D_tensor_argument(idx, _output, 2, slice_out);
+
+        _kernel.set_params(idx++, static_cast<unsigned int>(_input->info()->dimension(2)));
+        _kernel.set_params(idx++, static_cast<unsigned int>(_input->info()->strides_in_bytes()[3]));
+        _kernel.set_params(idx++, static_cast<unsigned int>(_output->info()->strides_in_bytes()[3]));
+        _kernel.update_shader_params();
+
+        enqueue(*this, slice);
+    }
+    while(window_collapsed.slide_window_slice_3D(slice) && window_collapsed.slide_window_slice_3D(slice_out) && window_collapsed.slide_window_slice_3D(slice_in));
+}
+
+void GCIm2ColKernel::run_reduced(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window);
+
+    Window out_window;
+    out_window.use_tensor_dimensions(_output->info()->tensor_shape());
+
+    Window out_slice = out_window.first_slice_window_1D();
+    Window in_slice  = window.first_slice_window_3D();
+
+    _kernel.use();
+
+    // Run kernel
+    do
+    {
+        // Set arguments
+        unsigned int idx = 0;
+
+        add_3D_tensor_argument(idx, _input, 1, in_slice);
+        add_1D_tensor_argument(idx, _output, 2, out_slice);
+        _kernel.set_params(idx++, _input->info()->dimension(0));
+        _kernel.set_params(idx++, _input->info()->dimension(1));
+        _kernel.update_shader_params();
+
+        enqueue(*this, in_slice);
+    }
+    while(window.slide_window_slice_3D(in_slice) && out_window.slide_window_slice_1D(out_slice));
+}
diff --git a/src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp
new file mode 100644
index 0000000000..65e54f538c
--- /dev/null
+++ b/src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp
@@ -0,0 +1,124 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h"
+
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+#include <string>
+
+using namespace arm_compute;
+
+GCNormalizationLayerKernel::GCNormalizationLayerKernel()
+    : _input(nullptr), _squared_input(nullptr), _output(nullptr), _border_size(0)
+{
+}
+
+BorderSize GCNormalizationLayerKernel::border_size() const
+{
+    return _border_size;
+}
+
+void GCNormalizationLayerKernel::configure(const IGCTensor *input, const IGCTensor *squared_input, IGCTensor *output, NormalizationLayerInfo norm_info)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+    ARM_COMPUTE_ERROR_ON_MSG(!(norm_info.norm_size() % 2), "Normalization size should be odd");
+    ARM_COMPUTE_ERROR_ON_MSG(norm_info.type() == NormType::IN_MAP_2D, "2D In-Map Normalization not implemented");
+
+    // Set build options
+    std::set<std::string> build_opts;
+
+    _input         = input;
+    _squared_input = squared_input;
+    _output        = output;
+
+    const bool         is_in_map    = (norm_info.type() == NormType::IN_MAP_1D);
+    const unsigned int border_width = is_in_map ? std::min(norm_info.norm_size() / 2, 3U) : 0;
+    _border_size                    = BorderSize(0, border_width);
+
+    // Set kernel static arguments
+    std::string func_name = ((norm_info.type() == NormType::IN_MAP_1D) ? "IN_MAP_1D" : "CROSS_MAP");
+    build_opts.emplace(("#define " + func_name));
+    build_opts.emplace(("#define COEFF " + float_to_string_with_full_precision(norm_info.scale_coeff())));
+    build_opts.emplace(("#define BETA " + float_to_string_with_full_precision(norm_info.beta())));
+    build_opts.emplace(("#define KAPPA " + float_to_string_with_full_precision(norm_info.kappa())));
+    build_opts.emplace(("#define RADIUS " + support::cpp11::to_string(norm_info.norm_size() / 2)));
+    build_opts.emplace(("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)));
+    build_opts.emplace(("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)));
+    build_opts.emplace(("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)));
+
+    // Create kernel
+    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("normalization_layer", build_opts));
+
+    // Configure kernel window
+    const unsigned int num_elems_processed_per_iteration = 1;
+    const unsigned int num_elems_read_per_iteration      = num_elems_processed_per_iteration + 2 * (norm_info.norm_size() / 2);
+
+    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
+
+    AccessWindowHorizontal input_access(input->info(), -_border_size.left, num_elems_read_per_iteration);
+    AccessWindowHorizontal squared_input_access(squared_input->info(), -_border_size.left, num_elems_read_per_iteration);
+    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+
+    update_window_and_padding(win, input_access, squared_input_access, output_access);
+
+    output_access.set_valid_region(win, input->info()->valid_region());
+
+    _kernel.clear_params();
+
+    _kernel.set_shader_params_binding_point(0);
+
+    IGCKernel::configure(win);
+}
+
+void GCNormalizationLayerKernel::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+
+    _kernel.use();
+
+    Window slice = window.first_slice_window_3D();
+
+    do
+    {
+        unsigned int idx     = 0;
+        unsigned int binding = 1;
+        add_3D_tensor_argument(idx, _input, binding++, slice);
+        add_3D_tensor_argument(idx, _squared_input, binding++, slice);
+        add_3D_tensor_argument(idx, _output, binding++, slice);
+
+        _kernel.update_shader_params();
+
+        enqueue(*this, slice);
+    }
+    while(window.slide_window_slice_3D(slice));
+}
diff --git a/src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp
new file mode 100644
index 0000000000..2b5cee455c
--- /dev/null
+++ b/src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+#include <cmath>
+#include <cstdlib>
+#include <set>
+#include <string>
+using namespace arm_compute;
+
+GCPixelWiseMultiplicationKernel::GCPixelWiseMultiplicationKernel()
+    : _input1(nullptr), _input2(nullptr), _output(nullptr)
+{
+}
+
+void GCPixelWiseMultiplicationKernel::configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output, float scale)
+{
+    ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output);
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
+    ARM_COMPUTE_ERROR_ON_MSG(scale < 0, "Scale cannot be negative. ");
+
+    // Auto initialize output if not initialized
+    {
+        set_shape_if_empty(*output->info(), input1->info()->tensor_shape());
+        set_format_if_unknown(*output->info(), Format::F32);
+    }
+
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input1, input2, output);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, output);
+    ARM_COMPUTE_ERROR_ON_MSG(scale < 0, "Scale cannot be negative. ");
+
+    _input1 = input1;
+    _input2 = input2;
+    _output = output;
+
+    std::string data_type;
+    std::string compute_type;
+
+    // Set kernel build options
+    std::set<std::string> build_opts;
+    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
+
+    build_opts.emplace("#define SCALE " + support::cpp11::to_string(scale));
+
+    // Create kernel
+    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("pixelwise_mul_float", build_opts));
+
+    _kernel.clear_params();
+
+    // Configure kernel window
+    constexpr unsigned int num_elems_processed_per_iteration = 1;
+
+    Window win = calculate_max_window(*input1->info(), Steps(num_elems_processed_per_iteration));
+
+    AccessWindowHorizontal input1_access(input1->info(), 0, num_elems_processed_per_iteration);
+    AccessWindowHorizontal input2_access(input2->info(), 0, num_elems_processed_per_iteration);
+    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+
+    update_window_and_padding(win, input1_access, input2_access, output_access);
+
+    ValidRegion valid_region = intersect_valid_regions(input1->info()->valid_region(),
+                                                       input2->info()->valid_region());
+    output_access.set_valid_region(win, valid_region);
+
+    // set shader params binding point
+    _kernel.set_shader_params_binding_point(0);
+
+    IGCKernel::configure(win);
+}
+
+void GCPixelWiseMultiplicationKernel::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
+
+    _kernel.use();
+
+    Window slice = window.first_slice_window_3D();
+
+    do
+    {
+        unsigned int idx     = 0;
+        unsigned int binding = 1;
+        add_3D_tensor_argument(idx, _input1, binding++, slice);
+        add_3D_tensor_argument(idx, _input2, binding++, slice);
+        add_3D_tensor_argument(idx, _output, binding++, slice);
+
+        _kernel.update_shader_params();
+        enqueue(*this, slice);
+    }
+    while(window.slide_window_slice_3D(slice));
+}
diff --git a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
new file mode 100644
index 0000000000..c877da3783
--- /dev/null
+++ b/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
@@ -0,0 +1,254 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h"
+
+#include "arm_compute/core/AccessWindowStatic.h"
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+#include <set>
+#include <string>
+#include <tuple>
+
+using namespace arm_compute;
+
+GCPoolingLayerKernel::GCPoolingLayerKernel()
+    : _input(nullptr), _output(nullptr), _pool_info(), _border_size(0), _num_elems_processed_per_iteration(1)
+{
+}
+
+BorderSize GCPoolingLayerKernel::border_size() const
+{
+    return _border_size;
+}
+
+void GCPoolingLayerKernel::configure(const IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info)
+{
+    int                 pool_pad_x      = 0;
+    int                 pool_pad_y      = 0;
+    int                 pool_stride_x   = 0;
+    int                 pool_stride_y   = 0;
+    unsigned int        pooled_w        = 0;
+    unsigned int        pooled_h        = 0;
+    const PoolingType   pool_type       = pool_info.pool_type();
+    const int           pool_size       = pool_info.pool_size();
+    const PadStrideInfo pad_stride_info = pool_info.pad_stride_info();
+    std::tie(pool_pad_x, pool_pad_y)       = pad_stride_info.pad();
+    std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
+
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
+    ARM_COMPUTE_ERROR_ON(pool_pad_x >= pool_size || pool_pad_y >= pool_size);
+    ARM_COMPUTE_ERROR_ON(pool_size > 7 && is_data_type_fixed_point(input->info()->data_type()));
+
+    // Check output dimensions
+    std::tie(pooled_w, pooled_h) = scaled_dimensions(input->info()->dimension(0),
+                                                     input->info()->dimension(1),
+                                                     pool_size,
+                                                     pool_size,
+                                                     pool_info.pad_stride_info());
+
+    // Output auto initialization if not yet initialized
+    {
+        TensorShape output_shape{ input->info()->tensor_shape() };
+        output_shape.set(0, pooled_w);
+        output_shape.set(1, pooled_h);
+
+        auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+    }
+
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+    ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) != pooled_w) || (output->info()->dimension(1) != pooled_h));
+
+    const int input_width  = input->info()->dimension(0);
+    const int input_height = input->info()->dimension(1);
+
+    // Set instance variables
+    _input       = input;
+    _output      = output;
+    _pool_info   = pool_info;
+    _border_size = BorderSize(pool_pad_y, pool_pad_x);
+
+    // Set build options
+    std::set<std::string> build_opts;
+    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
+    if(input->info()->data_type() == DataType::F32)
+    {
+        build_opts.insert("#define DATA_TYPE_FP32");
+    }
+    else
+    {
+        build_opts.insert("#define DATA_TYPE_FP16");
+    }
+    build_opts.emplace(("#define POOL_" + string_from_pooling_type(pool_type)));
+    build_opts.emplace(("#define STRIDE_X " + support::cpp11::to_string(pool_stride_x)));
+    build_opts.emplace(("#define MAX_WIDTH " + support::cpp11::to_string(input->info()->dimension(0) + pool_pad_x)));
+    build_opts.emplace(("#define MAX_HEIGHT " + support::cpp11::to_string(input->info()->dimension(1) + pool_pad_y)));
+    build_opts.emplace(("#define STRIDE_Y " + support::cpp11::to_string(pool_stride_y)));
+    build_opts.emplace(("#define PAD_X " + support::cpp11::to_string(pool_pad_x)));
+    build_opts.emplace(("#define PAD_Y " + support::cpp11::to_string(pool_pad_y)));
+
+    // Create kernel
+    if((pool_size == 2) || (pool_size == 3) || (pool_size == 7))
+    {
+        // Check if we have pool3x3 with stride_x less equal than 3. In these cases, run an optimized OpenGLES kernel where
+        // each thread computes 4 output elements
+        const bool is_pool3x3_stride_le3 = (pool_size == 3) && (pool_stride_x <= 3) && !is_data_type_fixed_point(input->info()->data_type());
+
+        int num_elements_read_per_iteration = (pool_size == 7) ? 8 : pool_size;
+
+        if(input->info()->data_type() == DataType::F32)
+        {
+            if(is_pool3x3_stride_le3)
+            {
+                // Change the number of elements processed and number of elements read per iteration for pooling 3x3 with stride less equal than 3
+                _num_elems_processed_per_iteration = 4;
+                num_elements_read_per_iteration    = pool_size * (pool_stride_x + 1);
+            }
+        }
+        else
+        {
+            num_elements_read_per_iteration = pool_size;
+            if(is_pool3x3_stride_le3)
+            {
+                _num_elems_processed_per_iteration = 4;
+            }
+            else
+            {
+                _num_elems_processed_per_iteration = 2;
+            }
+        }
+
+        const int upper_bound_w = ((pooled_w - 1) * pool_stride_x - pool_pad_x + num_elements_read_per_iteration) - input_width;
+        const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height;
+
+        _border_size.right  = std::max(upper_bound_w, pool_pad_x);
+        _border_size.bottom = std::max(upper_bound_h, pool_pad_y);
+
+        std::string kernel_name = "pooling_layer_" + support::cpp11::to_string(pool_size);
+        if(is_pool3x3_stride_le3)
+        {
+            build_opts.insert("#define POOLING_LAYER_3_OPTIMIZED");
+            _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name + "_optimized", build_opts));
+        }
+        else
+        {
+            build_opts.insert("#define POOLING_LAYER_" + support::cpp11::to_string(pool_size));
+            _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel(kernel_name, build_opts));
+        }
+    }
+    else // Run general case
+    {
+        if(input->info()->data_type() == DataType::F32)
+        {
+            _num_elems_processed_per_iteration = 1;
+        }
+        else
+        {
+            _num_elems_processed_per_iteration = 2;
+        }
+        const int upper_bound_w = ((pooled_w - 1) * pool_stride_x - pool_pad_x + pool_size) - input_width;
+        const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height;
+
+        _border_size.right  = std::max(upper_bound_w, pool_pad_x);
+        _border_size.bottom = std::max(upper_bound_h, pool_pad_y);
+
+        build_opts.emplace(("#define POOL_SIZE " + support::cpp11::to_string(pool_size)));
+
+        build_opts.insert("#define POOLING_LAYER_N");
+        _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("pooling_layer_n", build_opts));
+    }
+
+    Window win = calculate_max_window(*output->info(), Steps(_num_elems_processed_per_iteration));
+
+    if(input->info()->data_type() == DataType::F32)
+    {
+        AccessWindowStatic     input_access(input->info(), -pool_pad_x, -pool_pad_y, input_width + _border_size.right, input_height + _border_size.bottom);
+        AccessWindowHorizontal output_access(output->info(), 0, _num_elems_processed_per_iteration);
+        update_window_and_padding(win, input_access, output_access);
+        output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
+    }
+    else
+    {
+        // Calculate output right and bottom border
+        const int output_width          = output->info()->dimension(0);
+        const int output_height         = output->info()->dimension(1);
+        const int output_padding_right  = ceil_to_multiple(output_width, _num_elems_processed_per_iteration) - output_width;
+        const int output_padding_bottom = ceil_to_multiple(output_height, 1) - output_height;
+        const int input_padding_right   = ceil_to_multiple(input_width + 2 * _border_size.right, _num_elems_processed_per_iteration) - (input_width + 2 * _border_size.right);
+        const int input_padding_bottom  = ceil_to_multiple(input_height + 2 * _border_size.bottom, 1) - (input_height + 2 * _border_size.bottom);
+
+        // Configure kernel window
+        AccessWindowStatic input_access(input->info(), -pool_pad_x, -pool_pad_y, input_width + _border_size.right + input_padding_right, input_height + _border_size.bottom + input_padding_bottom);
+        AccessWindowStatic output_access(output->info(), 0, 0, output_width + output_padding_right, output_height + output_padding_bottom);
+        update_window_and_padding(win, input_access, output_access);
+        output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
+    }
+
+    _kernel.clear_params();
+    _kernel.set_shader_params_binding_point(0);
+
+    IGCKernel::configure(win);
+}
+
+void GCPoolingLayerKernel::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+
+    unsigned int pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y = 0;
+    std::tie(pool_pad_x, pool_pad_y)       = _pool_info.pad_stride_info().pad();
+    std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
+
+    _kernel.use();
+
+    Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ);
+    Window slice            = window_collapsed.first_slice_window_3D();
+
+    do
+    {
+        // Upsample input by pool size
+        Window in_slice(slice);
+        in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start() - pool_pad_x, in_slice.x().end() * pool_stride_x, pool_stride_x * _num_elems_processed_per_iteration));
+        in_slice.set(Window::DimY, Window::Dimension(in_slice.y().start() - pool_pad_y, in_slice.y().end() * pool_stride_y, pool_stride_y));
+
+        // Set inputs
+        unsigned int idx = 0;
+        add_3D_tensor_argument(idx, _input, 1, in_slice);
+        add_3D_tensor_argument(idx, _output, 2, slice);
+
+        _kernel.update_shader_params();
+        enqueue(*this, slice);
+    }
+    while(window_collapsed.slide_window_slice_3D(slice));
+}
diff --git a/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp
new file mode 100644
index 0000000000..09a0f79ab2
--- /dev/null
+++ b/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp
@@ -0,0 +1,353 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h"
+
+#include "arm_compute/core/AccessWindowStatic.h"
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/Window.h"
+
+#include <set>
+#include <string>
+
+using namespace arm_compute;
+
+void GCLogits1DMaxKernel::configure(const IGCTensor *input, IGCTensor *output)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
+
+    // Softmax across the x dimension
+    TensorShape output_shape{ input->info()->tensor_shape() };
+    output_shape.set(0, 1);
+
+    // Output auto initialization if not yet initialized
+    auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
+
+    _input  = input;
+    _output = output;
+
+    // Set build options
+    std::set<std::string> build_opts;
+    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
+    build_opts.insert("#define " + dt_name);
+    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
+    build_opts.insert("#define SOFTMAX_LAYER_MAX");
+
+    // Tell the kernel that the width is not a multiple of 4
+    if((input->info()->dimension(0) % 4) != 0)
+    {
+        build_opts.insert("#define NON_MULTIPLE_OF_4");
+    }
+
+    // Create kernel
+    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("softmax_layer_max", build_opts));
+
+    _kernel.clear_params();
+
+    // Set fixed arguments
+    unsigned int idx = 2 * num_arguments_per_3D_tensor(); //Skip the input and output parameters
+    _kernel.set_params(idx++, input->info()->dimension(0));
+
+    // Configure kernel window
+    // The kernel loops over all elements in steps of 4
+    const unsigned int num_elems_processed_per_iteration = ceil_to_multiple(input->info()->dimension(0), 4);
+    unsigned int       num_elems_written_per_iteration   = 1;
+    if(input->info()->data_type() == DataType::F16)
+    {
+        num_elems_written_per_iteration = 2;
+    }
+
+    Window                 win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
+    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
+    AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration);
+
+    update_window_and_padding(win, input_access, output_access);
+
+    output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape()));
+
+    // set shader params binding point
+    _kernel.set_shader_params_binding_point(0);
+
+    IGCKernel::configure(win);
+}
+
+void GCLogits1DMaxKernel::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+
+    Window slice = window.first_slice_window_3D();
+
+    _kernel.use();
+
+    do
+    {
+        unsigned int idx1 = 0;
+        switch(_input->info()->data_type())
+        {
+            case DataType::F16:
+                add_3D_tensor_argument(idx1, _input, BufferParam(1, 2), slice);
+                add_3D_tensor_argument(idx1, _output, BufferParam(2, 2), slice);
+                break;
+
+            case DataType::F32:
+                add_3D_tensor_argument(idx1, _input, BufferParam(1, 2), slice);
+                add_3D_tensor_argument(idx1, _output, BufferParam(2, 2), slice);
+                break;
+
+            default:
+                ARM_COMPUTE_ERROR("Current data type is mot supported");
+                break;
+        }
+
+        _kernel.update_shader_params();
+        enqueue(*this, slice);
+    }
+    while(window.slide_window_slice_3D(slice));
+}
+
+GCLogits1DShiftExpSumKernel::GCLogits1DShiftExpSumKernel()
+    : _input(nullptr), _max(nullptr), _output(nullptr), _sum(nullptr)
+{
+}
+
+void GCLogits1DShiftExpSumKernel::configure(const IGCTensor *input, const IGCTensor *max, IGCTensor *output, IGCTensor *sum)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(max, sum, output);
+
+    // Output auto initialization if not yet initialized
+    auto_init_if_empty(*sum->info(), max->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
+    auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
+
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, max, sum);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(max, sum);
+
+    _input  = input;
+    _max    = max;
+    _output = output;
+    _sum    = sum;
+
+    // Set build options
+    std::set<std::string> build_opts;
+    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
+    build_opts.insert("#define " + dt_name);
+    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
+    build_opts.insert("#define SOFTMAX_LAYER_SHIFT_EXP_SUM");
+
+    // Tell the kernel that the width is not a multiple of 4
+    if((input->info()->dimension(0) % 4) != 0)
+    {
+        build_opts.insert("#define NON_MULTIPLE_OF_4");
+    }
+
+    // Create kernel
+    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("softmax_layer_shift_exp_sum", build_opts));
+
+    _kernel.clear_params();
+
+    // Set fixed arguments
+    unsigned int idx = 4 * num_arguments_per_3D_tensor(); //Skip the input and output parameters
+    _kernel.set_params(idx++, input->info()->dimension(0));
+
+    // Configure window
+    // The kernel loops over all elements in steps of 4
+    const unsigned int num_elems_processed_per_iteration = ceil_to_multiple(input->info()->dimension(0), 4);
+    unsigned int       num_elems_written_per_iteration   = 1;
+    if(input->info()->data_type() == DataType::F16)
+    {
+        num_elems_written_per_iteration = 2;
+    }
+
+    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
+
+    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
+    AccessWindowHorizontal max_access(max->info(), 0, num_elems_written_per_iteration);
+    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+    AccessWindowHorizontal sum_access(sum->info(), 0, num_elems_written_per_iteration);
+
+    update_window_and_padding(win, input_access, max_access, output_access, sum_access);
+
+    output_access.set_valid_region(win, input->info()->valid_region());
+    sum_access.set_valid_region(win, ValidRegion(Coordinates(), sum->info()->tensor_shape()));
+
+    // set shader params binding point
+    _kernel.set_shader_params_binding_point(0);
+
+    IGCKernel::configure(win);
+}
+
+void GCLogits1DShiftExpSumKernel::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+
+    Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ);
+    Window slice            = window_collapsed.first_slice_window_3D();
+
+    _kernel.use();
+
+    do
+    {
+        unsigned int idx = 0;
+        switch(_input->info()->data_type())
+        {
+            case DataType::F16:
+                add_3D_tensor_argument(idx, _input, BufferParam(1, 2), slice);
+                add_3D_tensor_argument(idx, _max, BufferParam(2, 2), slice);
+                add_3D_tensor_argument(idx, _output, BufferParam(3, 2), slice);
+                add_3D_tensor_argument(idx, _sum, BufferParam(4, 2), slice);
+                break;
+
+            case DataType::F32:
+                add_3D_tensor_argument(idx, _input, BufferParam(1, 2), slice);
+                add_3D_tensor_argument(idx, _max, BufferParam(2, 2), slice);
+                add_3D_tensor_argument(idx, _output, BufferParam(3, 2), slice);
+                add_3D_tensor_argument(idx, _sum, BufferParam(4, 2), slice);
+                break;
+
+            default:
+                ARM_COMPUTE_ERROR("Current data type is mot supported");
+                break;
+        }
+
+        _kernel.update_shader_params();
+        enqueue(*this, slice);
+    }
+    while(window_collapsed.slide_window_slice_3D(slice));
+}
+
+GCLogits1DNormKernel::GCLogits1DNormKernel()
+    : _input(nullptr), _sum(nullptr), _output(nullptr)
+{
+}
+
+void GCLogits1DNormKernel::configure(const IGCTensor *input, const IGCTensor *sum, IGCTensor *output)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(sum, output);
+
+    // Output auto initialization if not yet initialized
+    auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position());
+
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, sum, output);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, sum, output);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
+
+    _input  = input;
+    _sum    = sum;
+    _output = output;
+
+    // Set build options
+    std::set<std::string> build_opts;
+    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
+    build_opts.insert("#define " + dt_name);
+    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
+    build_opts.insert("#define SOFTMAX_LAYER_NORM");
+
+    // Create kernel
+    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("softmax_layer_norm", build_opts));
+
+    // Configure window
+    constexpr unsigned int num_elems_processed_per_iteration = 4;
+    unsigned int           num_elems_written_per_iteration   = 1;
+    if(input->info()->data_type() == DataType::F16)
+    {
+        num_elems_written_per_iteration = 2;
+    }
+
+    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
+
+    AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
+    AccessWindowStatic     sum_access(sum->info(), 0, 0, num_elems_written_per_iteration, sum->info()->dimension(1));
+    AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
+
+    update_window_and_padding(win, input_access, sum_access, output_access);
+
+    output_access.set_valid_region(win, input->info()->valid_region());
+
+    _kernel.clear_params();
+
+    // set shader params binding point
+    _kernel.set_shader_params_binding_point(0);
+
+    IGCKernel::configure(win);
+}
+
+void GCLogits1DNormKernel::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
+
+    Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ);
+    Window slice            = window_collapsed.first_slice_window_3D();
+
+    _kernel.use();
+
+    do
+    {
+        Window sum_slice = slice;
+        sum_slice.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+        unsigned int idx1 = 0;
+        switch(_input->info()->data_type())
+        {
+            case DataType::F16:
+                add_3D_tensor_argument(idx1, _input, BufferParam(1, 2), slice);
+                add_3D_tensor_argument(idx1, _sum, BufferParam(2, 2), slice);
+                add_3D_tensor_argument(idx1, _output, BufferParam(3, 2), slice);
+                break;
+
+            case DataType::F32:
+                add_3D_tensor_argument(idx1, _input, BufferParam(1, 2), slice);
+                add_3D_tensor_argument(idx1, _sum, BufferParam(2, 2), slice);
+                add_3D_tensor_argument(idx1, _output, BufferParam(3, 2), slice);
+                break;
+
+            default:
+                ARM_COMPUTE_ERROR("Current data type is mot supported");
+                break;
+        }
+
+        _kernel.update_shader_params();
+        enqueue(*this, slice);
+    }
+    while(window_collapsed.slide_window_slice_3D(slice));
+}
diff --git a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
new file mode 100644
index 0000000000..b891b42ef8
--- /dev/null
+++ b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h"
+
+#include "arm_compute/core/AccessWindowTranspose.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Types.h"
+
+#include <set>
+#include <string>
+
+using namespace arm_compute;
+
+void GCTransposeKernel::configure(const IGCTensor *input, IGCTensor *output)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_NULLPTR(output);
+
+    TensorShape  output_shape{ input->info()->tensor_shape() };
+    const size_t w_out = input->info()->dimension(1);
+    const size_t h_out = input->info()->dimension(0);
+    output_shape.set(0, w_out);
+    output_shape.set(1, h_out);
+
+    // Output tensor auto inizialitation if not yet initialized
+    auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position());
+
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+
+    _input  = input;
+    _output = output;
+
+    std::set<std::string> build_opts;
+    std::string           dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16";
+    build_opts.emplace(("#define " + dt_name));
+    build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1));
+    build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1));
+
+    // Create kernel
+    _kernel = static_cast<GCKernel>(GCKernelLibrary::get().create_kernel("transpose", build_opts));
+
+    _kernel.clear_params();
+
+    // Configure kernel window
+    const unsigned int num_elems_processed_per_iteration = 4;
+
+    Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration, num_elems_processed_per_iteration));
+
+    AccessWindowRectangle input_access(input->info(), 0, 0, num_elems_processed_per_iteration, num_elems_processed_per_iteration);
+    AccessWindowTranspose output_access(output->info(), 0, 0, num_elems_processed_per_iteration, num_elems_processed_per_iteration);
+    update_window_and_padding(win, input_access, output_access);
+
+    output_access.set_valid_region(win, input->info()->valid_region());
+
+    // set shader params binding point
+    _kernel.set_shader_params_binding_point(0);
+
+    IGCKernel::configure(win);
+}
+
+void GCTransposeKernel::run(const Window &window)
+{
+    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
+    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window);
+
+    _kernel.use();
+
+    Window slice = window.first_slice_window_2D();
+
+    do
+    {
+        unsigned int idx = 0;
+        if(_input->info()->data_type() == DataType::F32)
+        {
+            add_2D_tensor_argument(idx, _input, 1, slice);
+            add_2D_tensor_argument(idx, _output, 2, slice);
+        }
+        else if(_input->info()->data_type() == DataType::F16)
+        {
+            add_2D_tensor_argument(idx, _input, BufferParam(1, 3), slice);
+            add_2D_tensor_argument(idx, _output, BufferParam(2, 3), slice);
+        }
+
+        _kernel.update_shader_params();
+        enqueue(*this, slice);
+    }
+    while(window.slide_window_slice_2D(slice));
+}
diff --git a/src/core/Helpers.cpp b/src/core/Helpers.cpp
index fc0b6e9361..151d7de9a4 100644
--- a/src/core/Helpers.cpp
+++ b/src/core/Helpers.cpp
@@ -106,6 +106,13 @@ Window arm_compute::calculate_max_enlarged_window(const ITensorInfo &info, const
         ++n;
     }
 
+    if(tensor_shape.num_dimensions() > 2)
+    {
+        window.set(2, Window::Dimension(0, std::max<size_t>(1, tensor_shape[n]), steps[2]));
+
+        ++n;
+    }
+
     for(; n < Coordinates::num_max_dimensions; ++n)
     {
         window.set(n, Window::Dimension(0, std::max<size_t>(1, tensor_shape[n])));
diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp
index bd6911fd2b..af864f57f7 100644
--- a/src/core/Utils.cpp
+++ b/src/core/Utils.cpp
@@ -353,6 +353,7 @@ void arm_compute::print_consecutive_elements(std::ostream &s, DataType dt, const
             print_consecutive_elements_impl<float>(s, reinterpret_cast<const float *>(ptr), n, stream_width, element_delim);
             break;
         case DataType::F16:
+            print_consecutive_elements_impl<half>(s, reinterpret_cast<const half *>(ptr), n, stream_width, element_delim);
             break;
         default:
             ARM_COMPUTE_ERROR("Undefined element size for given data type");
@@ -380,7 +381,7 @@ int arm_compute::max_consecutive_elements_display_width(std::ostream &s, DataTyp
         case DataType::F32:
             return max_consecutive_elements_display_width_impl<float>(s, reinterpret_cast<const float *>(ptr), n);
         case DataType::F16:
-            return 0;
+            return max_consecutive_elements_display_width_impl<half>(s, reinterpret_cast<const half *>(ptr), n);
         default:
             ARM_COMPUTE_ERROR("Undefined element size for given data type");
     }
diff --git a/src/runtime/CL/functions/CLNormalizationLayer.cpp b/src/runtime/CL/functions/CLNormalizationLayer.cpp
index f4bd49406c..648ce6b3a6 100644
--- a/src/runtime/CL/functions/CLNormalizationLayer.cpp
+++ b/src/runtime/CL/functions/CLNormalizationLayer.cpp
@@ -37,7 +37,7 @@ CLNormalizationLayer::CLNormalizationLayer()
 {
 }
 
-void CLNormalizationLayer::configure(ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info)
+void CLNormalizationLayer::configure(ICLTensor *input, ICLTensor *output, const NormalizationLayerInfo &norm_info)
 {
     ARM_COMPUTE_ERROR_ON(input == nullptr);
 
diff --git a/src/runtime/GLES_COMPUTE/GCScheduler.cpp b/src/runtime/GLES_COMPUTE/GCScheduler.cpp
new file mode 100644
index 0000000000..b2235ea6f9
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/GCScheduler.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+
+#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h"
+
+using namespace arm_compute;
+
+GCScheduler::GCScheduler() = default;
+
+void GCScheduler::default_init()
+{
+    GCKernelLibrary::get().init("./cs_shaders/");
+}
+
+void GCScheduler::init(EGLDisplay dpy, EGLContext ctx)
+{
+    GCKernelLibrary::get().init("./cs_shaders/", dpy, ctx);
+}
+
+GCScheduler &GCScheduler::get()
+{
+    static GCScheduler scheduler;
+    return scheduler;
+}
+
+void GCScheduler::enqueue(IGCKernel &kernel, bool flush)
+{
+    kernel.run(kernel.window());
+    if(flush)
+    {
+        ARM_COMPUTE_GL_CHECK(glFlush());
+    }
+}
+
+void GCScheduler::sync()
+{
+    ARM_COMPUTE_GL_CHECK(glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT));
+}
diff --git a/src/runtime/GLES_COMPUTE/GCTensor.cpp b/src/runtime/GLES_COMPUTE/GCTensor.cpp
new file mode 100644
index 0000000000..edbd16dc1d
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/GCTensor.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+
+using namespace arm_compute;
+
+GCTensor::GCTensor()
+    : _allocator()
+{
+}
+
+ITensorAllocator *GCTensor::allocator()
+{
+    return &_allocator;
+}
+
+TensorInfo *GCTensor::info() const
+{
+    return &_allocator.info();
+}
+
+TensorInfo *GCTensor::info()
+{
+    return &_allocator.info();
+}
+
+uint8_t *GCTensor::buffer() const
+{
+    return _allocator.data();
+}
+
+GLuint GCTensor::gc_buffer() const
+{
+    return _allocator.get_gl_ssbo_name();
+}
+
+void GCTensor::map(bool blocking)
+{
+    IGCTensor::map(blocking);
+}
+
+void GCTensor::unmap()
+{
+    IGCTensor::unmap();
+}
+
+uint8_t *GCTensor::do_map(bool blocking)
+{
+    return _allocator.map(blocking);
+}
+
+void GCTensor::do_unmap()
+{
+    _allocator.unmap();
+}
\ No newline at end of file
diff --git a/src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp b/src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp
new file mode 100644
index 0000000000..694b34f1ec
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+GCTensorAllocator::GCTensorAllocator()
+    : _gl_buffer(), _mapping(nullptr)
+{
+}
+
+uint8_t *GCTensorAllocator::data()
+{
+    return _mapping;
+}
+
+void GCTensorAllocator::allocate()
+{
+    _gl_buffer = support::cpp14::make_unique<GLBufferWrapper>();
+    ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, _gl_buffer->_ssbo_name));
+    ARM_COMPUTE_GL_CHECK(glBufferData(GL_SHADER_STORAGE_BUFFER, static_cast<GLsizeiptr>(info().total_size()), nullptr, GL_STATIC_DRAW));
+    ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0));
+    info().set_is_resizable(false);
+}
+
+void GCTensorAllocator::free()
+{
+    _gl_buffer.reset();
+    info().set_is_resizable(true);
+}
+
+uint8_t *GCTensorAllocator::lock()
+{
+    return map(true);
+}
+
+void GCTensorAllocator::unlock()
+{
+    unmap();
+}
+
+GLuint GCTensorAllocator::get_gl_ssbo_name() const
+{
+    return _gl_buffer->_ssbo_name;
+}
+
+uint8_t *GCTensorAllocator::map(bool blocking)
+{
+    ARM_COMPUTE_ERROR_ON(_mapping != nullptr);
+    ARM_COMPUTE_UNUSED(blocking);
+
+    ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, _gl_buffer->_ssbo_name));
+    void *p  = ARM_COMPUTE_GL_CHECK(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, static_cast<GLsizeiptr>(info().total_size()), GL_MAP_READ_BIT | GL_MAP_WRITE_BIT));
+    _mapping = reinterpret_cast<uint8_t *>(p);
+
+    return _mapping;
+}
+
+void GCTensorAllocator::unmap()
+{
+    ARM_COMPUTE_ERROR_ON(_mapping == nullptr);
+
+    ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, _gl_buffer->_ssbo_name));
+    ARM_COMPUTE_GL_CHECK(glUnmapBuffer(GL_SHADER_STORAGE_BUFFER));
+    ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0));
+    _mapping = nullptr;
+}
\ No newline at end of file
diff --git a/src/runtime/GLES_COMPUTE/IGCSimpleFunction.cpp b/src/runtime/GLES_COMPUTE/IGCSimpleFunction.cpp
new file mode 100644
index 0000000000..19f178f445
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/IGCSimpleFunction.cpp
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+
+using namespace arm_compute;
+
+IGCSimpleFunction::IGCSimpleFunction() //NOLINT
+    : _kernel(),
+      _border_handler()
+{
+}
+
+void IGCSimpleFunction::run()
+{
+    ARM_COMPUTE_ERROR_ON_MSG(!_kernel, "The child class didn't set the GLES kernel or function isn't configured");
+
+    // FIXME(APPBROWSER-300): We may need to rename "enqueue" to "dispatch" and "sync" to "memory_barrier".
+    GCScheduler::get().enqueue(_border_handler, false);
+    GCScheduler::get().sync();
+    GCScheduler::get().enqueue(*_kernel);
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.cpp b/src/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.cpp
new file mode 100644
index 0000000000..781b357ce7
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.h"
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h"
+#include "arm_compute/core/Helpers.h"
+#include "support/ToolchainSupport.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void GCAbsoluteDifference::configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output)
+{
+    auto k = arm_compute::support::cpp14::make_unique<GCAbsoluteDifferenceKernel>();
+    k->configure(input1, input2, output);
+    _kernel = std::move(k);
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCActivationLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCActivationLayer.cpp
new file mode 100644
index 0000000000..8686416616
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCActivationLayer.cpp
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h"
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h"
+#include "arm_compute/core/Helpers.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+void GCActivationLayer::configure(IGCTensor *input, IGCTensor *output, ActivationLayerInfo act_info)
+{
+    auto k = arm_compute::support::cpp14::make_unique<GCActivationLayerKernel>();
+    k->configure(input, output, act_info);
+    _kernel = std::move(k);
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.cpp
new file mode 100755
index 0000000000..2e546a663a
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.cpp
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+
+using namespace arm_compute;
+
+GCBatchNormalizationLayer::GCBatchNormalizationLayer()
+    : _norm_kernel()
+{
+}
+
+void GCBatchNormalizationLayer::configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta, const IGCTensor *gamma, float epsilon)
+{
+    _norm_kernel.configure(input, output, mean, var, beta, gamma, epsilon);
+}
+
+void GCBatchNormalizationLayer::run()
+{
+    GCScheduler::get().enqueue(_norm_kernel, true);
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.cpp b/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.cpp
new file mode 100755
index 0000000000..ed756cf261
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.cpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+GCDepthConcatenate::GCDepthConcatenate() //NOLINT
+    : _concat_kernels_vector(),
+      _border_handlers_vector(),
+      _num_inputs(0)
+{
+}
+
+void GCDepthConcatenate::configure(std::vector<IGCTensor *> inputs_vector, IGCTensor *output) //NOLINT
+{
+    ARM_COMPUTE_ERROR_ON(inputs_vector.size() < 2);
+
+    _num_inputs = inputs_vector.size();
+
+    unsigned int depth_offset = 0;
+
+    _concat_kernels_vector  = arm_compute::support::cpp14::make_unique<GCDepthConcatenateKernel[]>(_num_inputs);
+    _border_handlers_vector = arm_compute::support::cpp14::make_unique<GCFillBorderKernel[]>(_num_inputs);
+
+    for(unsigned int i = 0; i < _num_inputs; i++)
+    {
+        _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output);
+        _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue(0));
+
+        depth_offset += inputs_vector.at(i)->info()->dimension(2);
+    }
+}
+
+void GCDepthConcatenate::run()
+{
+    for(unsigned i = 0; i < _num_inputs; i++)
+    {
+        GCScheduler::get().enqueue(_border_handlers_vector[i], false);
+        GCScheduler::get().enqueue(_concat_kernels_vector[i], true);
+    }
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp
new file mode 100644
index 0000000000..ae9dd51b8e
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h"
+
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/Utils.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+void GCDirectConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info)
+{
+    int kernel_size = weights->info()->dimension(0);
+
+    if(kernel_size == 1)
+    {
+        auto k = arm_compute::support::cpp14::make_unique<GCDirectConvolutionLayer1x1Kernel>();
+        k->configure(input, weights, biases, output, conv_info);
+        _kernel = std::move(k);
+    }
+    else if(kernel_size == 3)
+    {
+        auto k = arm_compute::support::cpp14::make_unique<GCDirectConvolutionLayer3x3Kernel>();
+        k->configure(input, weights, biases, output, conv_info);
+        _kernel = std::move(k);
+    }
+    else if(kernel_size == 5)
+    {
+        auto k = arm_compute::support::cpp14::make_unique<GCDirectConvolutionLayer5x5Kernel>();
+        k->configure(input, weights, biases, output, conv_info);
+        _kernel = std::move(k);
+    }
+    else
+    {
+        ARM_COMPUTE_ERROR("kernel size unsupported!");
+        return;
+    }
+
+    _border_handler.configure(input, _kernel->border_size(), BorderMode::CONSTANT, PixelValue(0));
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCDropoutLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDropoutLayer.cpp
new file mode 100644
index 0000000000..032c2fdb1e
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCDropoutLayer.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h"
+
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+
+using namespace arm_compute;
+
+GCDropoutLayer::GCDropoutLayer()
+    : _dropout_kernel()
+{
+}
+
+void GCDropoutLayer::configure(const IGCTensor *input, IGCTensor *mask, IGCTensor *output, float ratio, bool forward)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, mask, output);
+
+    // Configure kernel
+    _dropout_kernel.configure(input, mask, output, ratio, forward);
+}
+
+void GCDropoutLayer::run()
+{
+    GCScheduler::get().enqueue(_dropout_kernel);
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCFillBorder.cpp b/src/runtime/GLES_COMPUTE/functions/GCFillBorder.cpp
new file mode 100644
index 0000000000..5c2431fa13
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCFillBorder.cpp
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCFillBorder.h"
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h"
+#include "arm_compute/core/Helpers.h"
+#include "support/ToolchainSupport.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void GCFillBorder::configure(IGCTensor *tensor, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value)
+{
+    auto k = arm_compute::support::cpp14::make_unique<GCFillBorderKernel>();
+    k->configure(tensor, BorderSize(border_width), border_mode, constant_border_value);
+    _kernel = std::move(k);
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp
new file mode 100644
index 0000000000..63cb40e616
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h"
+
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+#include "support/ToolchainSupport.h"
+
+#include <algorithm>
+
+using namespace arm_compute;
+
+void GCFullyConnectedLayerReshapeWeights::configure(const IGCTensor *input, IGCTensor *output)
+{
+    auto k = arm_compute::support::cpp14::make_unique<GCTransposeKernel>();
+    k->configure(input, output);
+    _kernel = std::move(k);
+}
+
+GCFullyConnectedLayer::GCFullyConnectedLayer()
+    : _im2col_kernel(), _reshape_weights_kernel(), _mm_kernel(), _accumulate_biases_kernel(), _im2col_output(), _reshape_weights_output(), _are_weights_reshaped(true), _is_fc_after_conv(true),
+      _accumulate_biases(false)
+{
+}
+
+void GCFullyConnectedLayer::configure_conv_fc(const IGCTensor *input, const IGCTensor *weights, IGCTensor *output)
+{
+    ARM_COMPUTE_ERROR_ON((weights->info()->dimension(1) != (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))));
+
+    const DataType dt = input->info()->data_type();
+
+    // If the fully connected layer is called after a convolution layer, the input tensor must be linearized
+
+    // Initialize output tensor for im2col
+    TensorShape shape_im2col;
+    shape_im2col.set(0, input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2));
+    shape_im2col.set(1, input->info()->dimension(3));
+    shape_im2col.set(2, input->info()->dimension(4));
+    shape_im2col.set(3, input->info()->dimension(5));
+    _im2col_output.allocator()->init(TensorInfo(shape_im2col, 1, dt));
+
+    // Configure im2col kernel
+    _im2col_kernel.configure(input, &_im2col_output, std::make_pair(1, 1), PadStrideInfo(1, 1, 0, 0), false);
+
+    // Configure matrix multiply kernel
+    _mm_kernel.configure(&_im2col_output, weights, output, 1.0f, false);
+
+    // Allocate the output tensor for im2col once all the configure methods have been called
+    _im2col_output.allocator()->allocate();
+}
+
+void GCFullyConnectedLayer::configure_fc_fc(const IGCTensor *input, const IGCTensor *weights, IGCTensor *output)
+{
+    ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1));
+
+    // Configure matrix multiply kernel
+    _mm_kernel.configure(input, weights, output, 1.0f, false);
+}
+
+void GCFullyConnectedLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, bool transpose_weights, bool are_weights_reshaped)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
+    ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 2);
+
+    _are_weights_reshaped = transpose_weights ? are_weights_reshaped : true;
+    _is_fc_after_conv     = true;
+    _accumulate_biases    = false;
+
+    if(biases != nullptr)
+    {
+        ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases);
+
+        _accumulate_biases = true;
+
+        // Configure accumulate biases kernel
+        _accumulate_biases_kernel.configure(output, biases);
+    }
+
+    // With the Fully Connected layer we can have 4 different cases:
+    //  1) Convolution layer -> Fully Connected layer without batches
+    //  2) Fully Connected layer -> Fully Connected layer without batches
+    //  3) Convolution layer -> Fully Connected layer with batches
+    //  4) Fully Connected layer -> Fully Connected layer with batches
+
+    const IGCTensor *weights_to_use = weights;
+
+    if(!_are_weights_reshaped)
+    {
+        weights_to_use = &_reshape_weights_output;
+
+        // Reshape the weights
+        _reshape_weights_kernel.configure(weights, &_reshape_weights_output);
+    }
+
+    // Check if we have a fully connected layer with batches
+    const bool is_batched_fc_layer = output->info()->dimension(1) > 1;
+
+    if(is_batched_fc_layer)
+    {
+        _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(input->info()->tensor_shape().cbegin() + 3,
+                                                                                  input->info()->tensor_shape().cend(),
+                                                                                  output->info()->tensor_shape().cbegin() + 1));
+    }
+    else
+    {
+        _is_fc_after_conv = input->info()->num_dimensions() > 1;
+    }
+
+    if(_is_fc_after_conv)
+    {
+        // Fully Connected layer after a Convolution Layer without batches
+        configure_conv_fc(input, weights_to_use, output);
+    }
+    else
+    {
+        // Fully Connected layer after a Fully Connected Layer without batches
+        configure_fc_fc(input, weights_to_use, output);
+    }
+
+    // Allocate the transpose tensor if the are_weights_reshaped flag is false and once all the configure methods have been called
+    if(!_are_weights_reshaped)
+    {
+        // Allocate the tensor for the weights reshaped
+        _reshape_weights_output.allocator()->allocate();
+    }
+}
+
+void GCFullyConnectedLayer::run()
+{
+    // Reshape of the weights (happens only once)
+    if(!_are_weights_reshaped)
+    {
+        _are_weights_reshaped = true;
+        _reshape_weights_kernel.run();
+    }
+
+    // Linearize input if it comes from a convolutional layer
+    if(_is_fc_after_conv)
+    {
+        GCScheduler::get().enqueue(_im2col_kernel, false);
+    }
+
+    GCScheduler::get().sync();
+
+    // Run matrix multiply
+    GCScheduler::get().enqueue(_mm_kernel, !_accumulate_biases);
+
+    // Accumulate biases if provided
+    if(_accumulate_biases)
+    {
+        GCScheduler::get().sync();
+
+        GCScheduler::get().enqueue(_accumulate_biases_kernel);
+    }
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp
new file mode 100644
index 0000000000..c47a0e71fb
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h"
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+#include "arm_compute/runtime/ITensorAllocator.h"
+
+using namespace arm_compute;
+
+GCGEMM::GCGEMM()
+    : _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _ma_kernel(), _tmp_a(), _tmp_b(), _is_interleaved_transposed(false), _run_addition(false)
+{
+}
+
+void GCGEMM::configure(const IGCTensor *a, const IGCTensor *b, const IGCTensor *c, IGCTensor *output, float alpha, float beta)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(a, b, output);
+
+    if(c != nullptr)
+    {
+        ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(a, c);
+        ARM_COMPUTE_ERROR_ON_MSG(a->info()->dimension(1) != c->info()->dimension(1), "The C matrix must have the same number of rows as the matrix A");
+        ARM_COMPUTE_ERROR_ON_MSG(b->info()->dimension(0) != c->info()->dimension(0), "The C matrix must have the same number of columns as the matrix C");
+        ARM_COMPUTE_ERROR_ON_MSG(c->info()->dimension(0) != output->info()->dimension(0), "The C matrix must have the same number of rows as the output matrix");
+        ARM_COMPUTE_ERROR_ON_MSG(c->info()->dimension(1) != output->info()->dimension(1), "The C matrix must have the same number of columns as the output matrix");
+    }
+
+    ARM_COMPUTE_ERROR_ON_MSG(a->info()->dimension(0) != b->info()->dimension(1), "The product AB is defined only if the number of columns in A is equal to the number of rows in B");
+
+    // If the input tensor has less than 16 rows, we run a special version of GEMM without reshaping the input tensors
+    _is_interleaved_transposed = a->info()->dimension(1) > 16;
+
+    const IGCTensor *matrix_a = a;
+    const IGCTensor *matrix_b = b;
+
+    if(_is_interleaved_transposed)
+    {
+        matrix_a = &_tmp_a;
+        matrix_b = &_tmp_b;
+
+        TensorShape shape_tmp_a = a->info()->tensor_shape();
+        TensorShape shape_tmp_b = b->info()->tensor_shape();
+
+        shape_tmp_a.set(0, a->info()->dimension(0) * 4);
+        shape_tmp_a.set(1, std::ceil(a->info()->dimension(1) / 4.0f));
+
+        const unsigned int transpose_w = max_gc_vector_width / data_size_from_type(b->info()->data_type());
+        shape_tmp_b.set(0, b->info()->dimension(1) * transpose_w);
+        shape_tmp_b.set(1, std::ceil(b->info()->dimension(0) / static_cast<float>(transpose_w)));
+
+        TensorInfo info_a(shape_tmp_a, 1, a->info()->data_type(), a->info()->fixed_point_position());
+        _tmp_a.allocator()->init(info_a);
+
+        TensorInfo info_b(shape_tmp_b, 1, b->info()->data_type(), b->info()->fixed_point_position());
+        _tmp_b.allocator()->init(info_b);
+
+        // Configure interleave kernel
+        _interleave_kernel.configure(a, &_tmp_a);
+
+        // Configure transpose kernel
+        _transpose_kernel.configure(b, &_tmp_b);
+    }
+
+    _mm_kernel.configure(matrix_a, matrix_b, output, alpha, _is_interleaved_transposed);
+
+    if(_is_interleaved_transposed)
+    {
+        // Allocate intermediate tensors
+        _tmp_a.allocator()->allocate();
+        _tmp_b.allocator()->allocate();
+    }
+
+    // Configure matrix addition kernel
+    if(beta != 0 && c != nullptr)
+    {
+        _ma_kernel.configure(c, output, beta);
+        _run_addition = true;
+    }
+}
+
+void GCGEMM::run()
+{
+    if(_is_interleaved_transposed)
+    {
+        // Run interleave kernel
+        GCScheduler::get().enqueue(_interleave_kernel, false);
+
+        // Run transpose kernel
+        GCScheduler::get().enqueue(_transpose_kernel, false);
+    }
+
+    // Run matrix multiply kernel
+    GCScheduler::get().enqueue(_mm_kernel, !_run_addition);
+
+    // Run matrix addition kernel
+    if(_run_addition)
+    {
+        GCScheduler::get().enqueue(_ma_kernel);
+    }
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.cpp b/src/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.cpp
new file mode 100644
index 0000000000..44c940e126
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.cpp
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.h"
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+void GCGEMMInterleave4x4::configure(const IGCTensor *input, IGCTensor *output)
+{
+    auto k = arm_compute::support::cpp14::make_unique<GCGEMMInterleave4x4Kernel>();
+    k->configure(input, output);
+    _kernel = std::move(k);
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.cpp b/src/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.cpp
new file mode 100644
index 0000000000..893fa5572b
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.h"
+
+#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h"
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h"
+#include "arm_compute/core/Types.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+void GCGEMMTranspose1xW::configure(const IGCTensor *input, IGCTensor *output)
+{
+    auto k = arm_compute::support::cpp14::make_unique<GCGEMMTranspose1xWKernel>();
+    k->configure(input, output);
+    _kernel = std::move(k);
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp
new file mode 100644
index 0000000000..d30ed52d5c
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h"
+
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/PixelValue.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Validate.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+
+using namespace arm_compute;
+
+GCNormalizationLayer::GCNormalizationLayer()
+    : _squared_input(), _norm_kernel(), _multiply_kernel(), _border_handler()
+{
+}
+
+void GCNormalizationLayer::configure(const IGCTensor *input, IGCTensor *output, const NormalizationLayerInfo &norm_info)
+{
+    ARM_COMPUTE_ERROR_ON(input == nullptr);
+
+    _squared_input.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, input->info()->data_type()));
+
+    _norm_kernel.configure(input, &_squared_input, output, norm_info);
+    _multiply_kernel.configure(input, input, &_squared_input, 1.0f);
+    // Fill the border by 3 elements since we need vload4 in the IN_MAP normalization kernel
+    _border_handler.configure(&_squared_input, _norm_kernel.border_size(), BorderMode::CONSTANT, PixelValue(0));
+
+    // Allocate intermediate buffers
+    _squared_input.allocator()->allocate();
+}
+
+void GCNormalizationLayer::run()
+{
+    GCScheduler::get().enqueue(_multiply_kernel, false);
+    GCScheduler::get().enqueue(_border_handler, false);
+    GCScheduler::get().enqueue(_norm_kernel, false);
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.cpp b/src/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.cpp
new file mode 100755
index 0000000000..0cd87ea875
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.h"
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h"
+#include "support/ToolchainSupport.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void GCPixelWiseMultiplication::configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output, float scale)
+{
+    auto k = arm_compute::support::cpp14::make_unique<GCPixelWiseMultiplicationKernel>();
+    k->configure(input1, input2, output, scale);
+    _kernel = std::move(k);
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp
new file mode 100644
index 0000000000..46a60cddef
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h"
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h"
+#include "arm_compute/core/PixelValue.h"
+#include "support/ToolchainSupport.h"
+
+using namespace arm_compute;
+
+void GCPoolingLayer::configure(IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info)
+{
+    // Configure pooling kernel
+    auto k = arm_compute::support::cpp14::make_unique<GCPoolingLayerKernel>();
+    k->configure(input, output, pool_info);
+    _kernel = std::move(k);
+
+    // Configure border depending on operation required
+    BorderMode border_mode = (PoolingType::MAX == pool_info.pool_type()) ? BorderMode::REPLICATE : BorderMode::CONSTANT;
+    _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(0.0f));
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp
new file mode 100644
index 0000000000..d7d47d2802
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h"
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h"
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+
+using namespace arm_compute;
+
+GCSoftmaxLayer::GCSoftmaxLayer()
+    : _max_kernel(), _shift_exp_sum_kernel(), _norm_kernel(), _max(), _sum(), _tmp()
+{
+}
+
+void GCSoftmaxLayer::configure(const IGCTensor *input, IGCTensor *output)
+{
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+
+    // Create intermediate tensors shapes
+    _tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position()));
+
+    TensorShape shape = input->info()->tensor_shape();
+    shape.set(0, 1);
+    TensorInfo tensor_info_max_sum(shape, input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position());
+    _max.allocator()->init(tensor_info_max_sum);
+    _sum.allocator()->init(tensor_info_max_sum);
+
+    // Configure Kernels
+    _max_kernel.configure(input, &_max);
+    _shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum);
+    _norm_kernel.configure(&_tmp, &_sum, output);
+
+    // Allocate intermediate buffers
+    _tmp.allocator()->allocate();
+    _max.allocator()->allocate();
+    _sum.allocator()->allocate();
+}
+
+void GCSoftmaxLayer::run()
+{
+    GCScheduler::get().enqueue(_max_kernel, false);
+    GCScheduler::get().enqueue(_shift_exp_sum_kernel, false);
+    GCScheduler::get().enqueue(_norm_kernel);
+}
diff --git a/src/runtime/GLES_COMPUTE/functions/GCTranspose.cpp b/src/runtime/GLES_COMPUTE/functions/GCTranspose.cpp
new file mode 100644
index 0000000000..c2dc122e64
--- /dev/null
+++ b/src/runtime/GLES_COMPUTE/functions/GCTranspose.cpp
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCTranspose.h"
+
+#include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h"
+#include "support/ToolchainSupport.h"
+
+#include <utility>
+
+using namespace arm_compute;
+
+void GCTranspose::configure(const IGCTensor *input, IGCTensor *output)
+{
+    auto k = arm_compute::support::cpp14::make_unique<GCTransposeKernel>();
+    k->configure(input, output);
+    _kernel = std::move(k);
+}
diff --git a/src/runtime/NEON/functions/NENormalizationLayer.cpp b/src/runtime/NEON/functions/NENormalizationLayer.cpp
index e01ef6660d..da4314b5ed 100644
--- a/src/runtime/NEON/functions/NENormalizationLayer.cpp
+++ b/src/runtime/NEON/functions/NENormalizationLayer.cpp
@@ -37,7 +37,7 @@ NENormalizationLayer::NENormalizationLayer(std::shared_ptr<IMemoryManager> memor
 {
 }
 
-void NENormalizationLayer::configure(const ITensor *input, ITensor *output, NormalizationLayerInfo norm_info)
+void NENormalizationLayer::configure(const ITensor *input, ITensor *output, const NormalizationLayerInfo &norm_info)
 {
     ARM_COMPUTE_ERROR_ON(input == nullptr);
 
diff --git a/tests/GLES_COMPUTE/GCAccessor.h b/tests/GLES_COMPUTE/GCAccessor.h
new file mode 100644
index 0000000000..0f7c491c3c
--- /dev/null
+++ b/tests/GLES_COMPUTE/GCAccessor.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_GCACCESSOR_H__
+#define __ARM_COMPUTE_TEST_GCACCESSOR_H__
+
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "tests/IAccessor.h"
+
+namespace arm_compute
+{
+namespace test
+{
+/** Accessor implementation for @ref GCTensor objects. */
+class GCAccessor : public IAccessor
+{
+public:
+    /** Create an accessor for the given @p tensor.
+     *
+     * @param[in, out] tensor To be accessed tensor.
+     *
+     * @note The GLES memory is mapped by the constructor.
+     *
+     */
+    GCAccessor(GCTensor &tensor);
+
+    GCAccessor(const GCAccessor &) = delete;
+    GCAccessor &operator=(const GCAccessor &) = delete;
+    GCAccessor(GCAccessor &&)                 = default;
+    GCAccessor &operator=(GCAccessor &&) = default;
+
+    /** Destructor that unmaps the GLES memory. */
+    ~GCAccessor();
+
+    TensorShape      shape() const override;
+    size_t           element_size() const override;
+    size_t           size() const override;
+    Format           format() const override;
+    DataType         data_type() const override;
+    int              num_channels() const override;
+    int              num_elements() const override;
+    PaddingSize      padding() const override;
+    int              fixed_point_position() const override;
+    QuantizationInfo quantization_info() const override;
+    const void *operator()(const Coordinates &coord) const override;
+    void *operator()(const Coordinates &coord) override;
+
+private:
+    GCTensor &_tensor;
+};
+
+inline GCAccessor::GCAccessor(GCTensor &tensor)
+    : _tensor{ tensor }
+{
+    _tensor.map();
+}
+
+inline GCAccessor::~GCAccessor()
+{
+    _tensor.unmap();
+}
+
+inline TensorShape GCAccessor::shape() const
+{
+    return _tensor.info()->tensor_shape();
+}
+
+inline size_t GCAccessor::element_size() const
+{
+    return _tensor.info()->element_size();
+}
+
+inline size_t GCAccessor::size() const
+{
+    return _tensor.info()->total_size();
+}
+
+inline Format GCAccessor::format() const
+{
+    return _tensor.info()->format();
+}
+
+inline DataType GCAccessor::data_type() const
+{
+    return _tensor.info()->data_type();
+}
+
+inline int GCAccessor::num_channels() const
+{
+    return _tensor.info()->num_channels();
+}
+
+inline int GCAccessor::num_elements() const
+{
+    return _tensor.info()->tensor_shape().total_size();
+}
+
+inline PaddingSize GCAccessor::padding() const
+{
+    return _tensor.info()->padding();
+}
+
+inline int GCAccessor::fixed_point_position() const
+{
+    return _tensor.info()->fixed_point_position();
+}
+
+inline QuantizationInfo GCAccessor::quantization_info() const
+{
+    return _tensor.info()->quantization_info();
+}
+
+inline const void *GCAccessor::operator()(const Coordinates &coord) const
+{
+    return _tensor.ptr_to_element(coord);
+}
+
+inline void *GCAccessor::operator()(const Coordinates &coord)
+{
+    return _tensor.ptr_to_element(coord);
+}
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_GCACCESSOR_H__ */
diff --git a/tests/GLES_COMPUTE/Helper.h b/tests/GLES_COMPUTE/Helper.h
new file mode 100644
index 0000000000..5f6460aa61
--- /dev/null
+++ b/tests/GLES_COMPUTE/Helper.h
@@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef __ARM_COMPUTE_TEST_GC_HELPER_H__
+#define __ARM_COMPUTE_TEST_GC_HELPER_H__
+
+#include "tests/Globals.h"
+
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+
+#include <iostream>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace gles_compute
+{
+/** Helper to create an empty tensor.
+ *
+ * @param[in] shape                Desired shape.
+ * @param[in] data_type            Desired data type.
+ * @param[in] num_channels         (Optional) It indicates the number of channels for each tensor element
+ * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16.
+ *
+ * @return Empty @ref GCTensor with the specified shape and data type.
+ */
+inline GCTensor create_tensor(const TensorShape &shape, DataType data_type, int num_channels = 1, int fixed_point_position = 0)
+{
+    GCTensor tensor;
+    tensor.allocator()->init(TensorInfo(shape, num_channels, data_type, fixed_point_position));
+
+    return tensor;
+}
+
+/** Helper to create an empty tensor.
+ *
+ * @param[in] name      File name from which to get the dimensions.
+ * @param[in] data_type Desired data type.
+ *
+ * @return Empty @ref GCTensor with the specified shape and data type.
+ */
+inline GCTensor create_tensor(const std::string &name, DataType data_type)
+{
+    constexpr unsigned int num_channels = 1;
+
+    const RawTensor &raw = library->get(name);
+
+    GCTensor tensor;
+    tensor.allocator()->init(TensorInfo(raw.shape(), num_channels, data_type));
+
+    return tensor;
+}
+
+/** Helper to print tensor.
+ *
+ * @param[in] tensor Tensor to print.
+ * @param[in] name   Tensor name.
+ * @param[in] info   Format information.
+ *
+ * @return Empty @ref GCTensor with the specified shape and data type.
+ */
+inline void print_tensor(ITensor &tensor, const std::string &name, IOFormatInfo info = IOFormatInfo(IOFormatInfo::PrintRegion::Full))
+{
+    std::ostringstream s;
+    IGCTensor         &t = dynamic_cast<IGCTensor &>(tensor);
+    t.map();
+    t.print(s, info);
+
+    std::cout << name << ":" << std::endl;
+    std::cout << s.str().c_str();
+    t.unmap();
+
+    return;
+}
+
+/** Helper to sync tensor, if tensor is not used, GPU have optimized the operation.
+ *
+ * @param[in] tensor Tensor to be sync.
+ *
+ * @return Empty @ref GCTensor with the specified shape and data type.
+ */
+inline void force_sync_tensor(ITensor &tensor)
+{
+    IGCTensor &t = dynamic_cast<IGCTensor &>(tensor);
+    t.map();
+    t.unmap();
+
+    return;
+}
+} // namespace gles_compute
+} // namespace test
+} // namespace arm_compute
+#endif /* __ARM_COMPUTE_TEST_GC_HELPER_H__ */
diff --git a/tests/SConscript b/tests/SConscript
index 37c96d2f22..311eeedde1 100644
--- a/tests/SConscript
+++ b/tests/SConscript
@@ -68,6 +68,8 @@ test_env.Append(CPPPATH = ["#3rdparty/include"])
 test_env.Append(LIBPATH = ["#3rdparty/%s/%s" % (env['os'], env['arch'])])
 test_env.Append(LIBPATH = ["#build/%s" % env['build_dir']])
 test_env.Append(LIBPATH = ["#build/%s/opencl-1.2-stubs" % env['build_dir']])
+if env['gles_compute'] and env['os'] != 'android':
+    test_env.Append(LIBPATH = ["#build/%s/opengles-3.1/stubs" % env['build_dir']])
 
 Import("arm_compute_test_framework")
 test_env.Append(LIBS = arm_compute_test_framework)
@@ -109,6 +111,27 @@ if env['neon']:
     files_validation += Glob('validation/NEON/*/' + filter_pattern)
     files_validation += Glob('validation/NEON/' + filter_pattern)
 
+if env['gles_compute']:
+    if env['os'] != 'android':
+        Import('egl')
+        Import('glesv2')
+
+        test_env.Append(CPPPATH = ["#opengles-3.1/include", "#opengles-3.1/mali_include"])
+        test_env.Append(LIBS = ["EGL", "GLESv2"])
+    else:
+        if env['arch'] != 'armv7a':
+            test_env.Append(LIBS = ["EGL", "GLESv3"])
+        else:
+            test_env.Append(LIBS = ["EGL", "GLESv2"])
+
+    test_env.Append(CPPDEFINES=['ARM_COMPUTE_GC'])
+
+    files_benchmark += Glob('benchmark/GLES_COMPUTE/*/*.cpp')
+    files_benchmark += Glob('benchmark/GLES_COMPUTE/*.cpp')
+
+    files_validation += Glob('validation/GLES_COMPUTE/*/*.cpp')
+    files_validation += Glob('validation/GLES_COMPUTE/*.cpp')
+
 if env['os'] == 'android':
     test_env.Append(LIBS = ["log"])
 else:
@@ -121,6 +144,9 @@ if test_env['benchmark_tests']:
 
     if env['opencl']:
         Depends(arm_compute_benchmark, opencl)
+    if env['gles_compute'] and env['os'] != 'android':
+        Depends(arm_compute_benchmark, egl)
+        Depends(arm_compute_benchmark, glesv2)
 
     Default(arm_compute_benchmark)
     Export('arm_compute_benchmark')
@@ -132,6 +158,9 @@ if test_env['validation_tests']:
 
     if env['opencl']:
         Depends(arm_compute_validation, opencl)
+    if env['gles_compute'] and env['os'] != 'android':
+        Depends(arm_compute_validation, egl)
+        Depends(arm_compute_validation, glesv2)
 
     Default(arm_compute_validation)
     Export('arm_compute_validation')
diff --git a/tests/benchmark/GLES_COMPUTE/DirectConvolutionLayer.cpp b/tests/benchmark/GLES_COMPUTE/DirectConvolutionLayer.cpp
new file mode 100644
index 0000000000..89ca192f83
--- /dev/null
+++ b/tests/benchmark/GLES_COMPUTE/DirectConvolutionLayer.cpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/benchmark/fixtures/ConvolutionLayerFixture.h"
+#include "tests/datasets/system_tests/alexnet/AlexNetConvolutionLayerDataset.h"
+#include "tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1ConvolutionLayerDataset.h"
+#include "tests/datasets/system_tests/googlenet/inceptionv4/GoogLeNetInceptionV4ConvolutionLayerDataset.h"
+#include "tests/datasets/system_tests/squeezenet/SqueezeNetConvolutionLayerDataset.h"
+#include "tests/datasets/system_tests/vgg/vgg16/VGG16ConvolutionLayerDataset.h"
+#include "tests/datasets/system_tests/yolo/v2/YOLOV2ConvolutionLayerDataset.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::F16 });
+} // namespace
+
+using GCConvolutionLayerFixture = ConvolutionLayerFixture<GCTensor, GCDirectConvolutionLayer, GCAccessor>;
+
+TEST_SUITE(GC)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetDirectConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::AlexNetDirectConvolutionLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1DirectConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1DirectConvolutionLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4DirectConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4DirectConvolutionLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetDirectConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::SqueezeNetConvolutionLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
+TEST_SUITE(NIGHTLY)
+REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetDirectConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::AlexNetDirectConvolutionLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1DirectConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1DirectConvolutionLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4DirectConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4DirectConvolutionLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetDirectConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::SqueezeNetConvolutionLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(VGG16DirectConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::VGG16ConvolutionLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 1, 4, 8 })));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(YOLOV2DirectConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2ConvolutionLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 1, 4, 8 })));
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/GLES_COMPUTE/FullyConnectedLayer.cpp b/tests/benchmark/GLES_COMPUTE/FullyConnectedLayer.cpp
new file mode 100644
index 0000000000..36578edbfb
--- /dev/null
+++ b/tests/benchmark/GLES_COMPUTE/FullyConnectedLayer.cpp
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITCLSS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/benchmark/fixtures/FullyConnectedLayerFixture.h"
+#include "tests/datasets/system_tests/alexnet/AlexNetFullyConnectedLayerDataset.h"
+#include "tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1FullyConnectedLayerDataset.h"
+#include "tests/datasets/system_tests/googlenet/inceptionv4/GoogLeNetInceptionV4FullyConnectedLayerDataset.h"
+#include "tests/datasets/system_tests/lenet5/LeNet5FullyConnectedLayerDataset.h"
+#include "tests/datasets/system_tests/vgg/vgg16/VGG16FullyConnectedLayerDataset.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::F16 });
+} // namespace
+
+using GCFullyConnectedLayerFixture = FullyConnectedLayerFixture<GCTensor, GCFullyConnectedLayer, GCAccessor>;
+
+TEST_SUITE(GC)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetFullyConnectedLayer, GCFullyConnectedLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::AlexNetFullyConnectedLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(LeNet5FullyConnectedLayer, GCFullyConnectedLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::LeNet5FullyConnectedLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(VGG16FullyConnectedLayer, GCFullyConnectedLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::VGG16FullyConnectedLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1FullyConnectedLayer, GCFullyConnectedLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1FullyConnectedLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4FullyConnectedLayer, GCFullyConnectedLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4FullyConnectedLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
+TEST_SUITE(NIGHTLY)
+REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetFullyConnectedLayer, GCFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::AlexNetFullyConnectedLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(LeNet5FullyConnectedLayer, GCFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::LeNet5FullyConnectedLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(VGG16FullyConnectedLayer, GCFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::VGG16FullyConnectedLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1FullyConnectedLayer, GCFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1FullyConnectedLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4FullyConnectedLayer, GCFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4FullyConnectedLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/GLES_COMPUTE/GEMM.cpp b/tests/benchmark/GLES_COMPUTE/GEMM.cpp
new file mode 100644
index 0000000000..69d8e54919
--- /dev/null
+++ b/tests/benchmark/GLES_COMPUTE/GEMM.cpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/benchmark/fixtures/GEMMFixture.h"
+#include "tests/datasets/GoogleNetGEMMDataset.h"
+#include "tests/datasets/MatrixMultiplyGEMMDataset.h"
+#include "tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1GEMMDataset.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::F32 });
+} // namespace
+
+using GCGEMMFixture = GEMMFixture<GCTensor, GCGEMM, GCAccessor>;
+
+TEST_SUITE(GC)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1GEMM, GCGEMMFixture, framework::DatasetMode::ALL, framework::dataset::combine(datasets::GoogLeNetInceptionV1GEMMDataset(), data_types));
+REGISTER_FIXTURE_DATA_TEST_CASE(MatrixMultiplyGEMM, GCGEMMFixture, framework::DatasetMode::ALL, framework::dataset::combine(datasets::MatrixMultiplyGEMMDataset(), data_types));
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogleNetGEMM, GCGEMMFixture, framework::DatasetMode::NIGHTLY, framework::dataset::combine(datasets::GoogleNetGEMMDataset(), data_types));
+
+TEST_SUITE_END()
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/GLES_COMPUTE/PoolingLayer.cpp b/tests/benchmark/GLES_COMPUTE/PoolingLayer.cpp
new file mode 100644
index 0000000000..87c5382073
--- /dev/null
+++ b/tests/benchmark/GLES_COMPUTE/PoolingLayer.cpp
@@ -0,0 +1,128 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/benchmark/fixtures/PoolingLayerFixture.h"
+#include "tests/datasets/system_tests/alexnet/AlexNetPoolingLayerDataset.h"
+#include "tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1PoolingLayerDataset.h"
+#include "tests/datasets/system_tests/googlenet/inceptionv4/GoogLeNetInceptionV4PoolingLayerDataset.h"
+#include "tests/datasets/system_tests/lenet5/LeNet5PoolingLayerDataset.h"
+#include "tests/datasets/system_tests/squeezenet/SqueezeNetPoolingLayerDataset.h"
+#include "tests/datasets/system_tests/vgg/vgg16/VGG16PoolingLayerDataset.h"
+#include "tests/datasets/system_tests/yolo/v2/YOLOV2PoolingLayerDataset.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace
+{
+const auto data_types = framework::dataset::make("DataType", { DataType::F32 });
+} // namespace
+
+using GCPoolingLayerFixture = PoolingLayerFixture<GCTensor, GCPoolingLayer, GCAccessor>;
+
+TEST_SUITE(GC)
+
+REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetPoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::AlexNetPoolingLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(LeNet5PoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::LeNet5PoolingLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1PoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1PoolingLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4PoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4PoolingLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetPoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::SqueezeNetPoolingLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(VGG16PoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::VGG16PoolingLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(YOLOV2PoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::ALL,
+                                framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2PoolingLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", 1)));
+
+TEST_SUITE(NIGHTLY)
+REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetPoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::AlexNetPoolingLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(LeNet5PoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::LeNet5PoolingLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1PoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1PoolingLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4PoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4PoolingLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetPoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::SqueezeNetPoolingLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(VGG16PoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::VGG16PoolingLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+
+REGISTER_FIXTURE_DATA_TEST_CASE(YOLOV2PoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::NIGHTLY,
+                                framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2PoolingLayerDataset(),
+                                                                                        data_types),
+                                                            framework::dataset::make("Batches", { 4, 8 })));
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/benchmark/fixtures/ConvolutionLayerFixture.h b/tests/benchmark/fixtures/ConvolutionLayerFixture.h
index fd508d4500..09e6cbfaf8 100644
--- a/tests/benchmark/fixtures/ConvolutionLayerFixture.h
+++ b/tests/benchmark/fixtures/ConvolutionLayerFixture.h
@@ -30,6 +30,13 @@
 #include "tests/Utils.h"
 #include "tests/framework/Fixture.h"
 
+#ifdef ARM_COMPUTE_GC
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+#include "tests/GLES_COMPUTE/Helper.h"
+
+using namespace arm_compute::test::gles_compute;
+#endif /* ARM_COMPUTE_GC */
+
 namespace arm_compute
 {
 namespace test
@@ -71,6 +78,14 @@ public:
     void run()
     {
         conv_layer.run();
+#ifdef ARM_COMPUTE_GC
+        if(opengles31_is_available() && std::is_same<typename std::decay<TensorType>::type, arm_compute::GCTensor>::value)
+        {
+            GCScheduler::get().sync();
+            force_sync_tensor(src);
+            force_sync_tensor(dst);
+        }
+#endif /* ARM_COMPUTE_GC */
     }
 
     void teardown()
diff --git a/tests/datasets/FullyConnectedLayerDataset.h b/tests/datasets/FullyConnectedLayerDataset.h
index 9f8089d81a..b2008d604b 100644
--- a/tests/datasets/FullyConnectedLayerDataset.h
+++ b/tests/datasets/FullyConnectedLayerDataset.h
@@ -151,6 +151,7 @@ public:
         add_config(TensorShape(9U, 5U, 257U, 2U, 3U), TensorShape(11565U, 2123U), TensorShape(2123U), TensorShape(2123U, 2U, 3U));
     }
 };
+
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/framework/Framework.cpp b/tests/framework/Framework.cpp
index a5c665c458..39fe1fa00a 100644
--- a/tests/framework/Framework.cpp
+++ b/tests/framework/Framework.cpp
@@ -30,6 +30,11 @@
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #endif /* ARM_COMPUTE_CL */
 
+#ifdef ARM_COMPUTE_GC
+#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+#endif /* ARM_COMPUTE_GC */
+
 #include <chrono>
 #include <iostream>
 #include <sstream>
@@ -292,6 +297,12 @@ void Framework::run_test(const TestInfo &info, TestCaseFactory &test_factory)
                     CLScheduler::get().sync();
                 }
 #endif /* ARM_COMPUTE_CL */
+#ifdef ARM_COMPUTE_GC
+                if(opengles31_is_available())
+                {
+                    GCScheduler::get().sync();
+                }
+#endif /* ARM_COMPUTE_GC */
                 profiler.stop();
             }
 
diff --git a/tests/framework/SConscript b/tests/framework/SConscript
index 52b8bed10f..f4beaf85ce 100644
--- a/tests/framework/SConscript
+++ b/tests/framework/SConscript
@@ -48,6 +48,11 @@ Help(new_options.GenerateHelpText(framework_env))
 if(env['opencl']):
     framework_env.Append(CPPDEFINES=['ARM_COMPUTE_CL'])
 
+if(env['gles_compute']):
+    framework_env.Append(CPPDEFINES=['ARM_COMPUTE_GC'])
+    if env['os'] != 'android':
+        framework_env.Append(CPPPATH = ["#opengles-3.1/include", "#opengles-3.1/mali_include"])
+
 framework_env.Append(CPPPATH = ["."])
 framework_env.Append(CPPFLAGS=['-Wno-overloaded-virtual'])
 
diff --git a/tests/main.cpp b/tests/main.cpp
index d056267418..1f1f33a156 100644
--- a/tests/main.cpp
+++ b/tests/main.cpp
@@ -36,6 +36,9 @@
 #ifdef ARM_COMPUTE_CL
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #endif /* ARM_COMPUTE_CL */
+#ifdef ARM_COMPUTE_GC
+#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
+#endif /* ARM_COMPUTE_GC */
 #include "arm_compute/runtime/Scheduler.h"
 
 #include <fstream>
@@ -62,6 +65,10 @@ int main(int argc, char **argv)
     CLScheduler::get().default_init();
 #endif /* ARM_COMPUTE_CL */
 
+#ifdef ARM_COMPUTE_GC
+    GCScheduler::get().default_init();
+#endif /* ARM_COMPUTE_CL */
+
     framework::Framework &framework = framework::Framework::get();
 
     framework::CommandLineParser parser;
diff --git a/tests/validation/GLES_COMPUTE/ActivationLayer.cpp b/tests/validation/GLES_COMPUTE/ActivationLayer.cpp
new file mode 100644
index 0000000000..23821d35fa
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/ActivationLayer.cpp
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ActivationFunctionsDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/ActivationLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Define tolerance of the activation layer.
+ *
+ * @param[in] activation The activation function used.
+ * @param[in] data_type  Data type.
+ *
+ * @return Tolerance depending on the activation function.
+ */
+AbsoluteTolerance<float> tolerance(ActivationLayerInfo::ActivationFunction activation, DataType data_type)
+{
+    constexpr float epsilon = 1e-6f;
+
+    switch(activation)
+    {
+        case ActivationLayerInfo::ActivationFunction::LINEAR:
+            return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.2f : epsilon);
+        case ActivationLayerInfo::ActivationFunction::SQUARE:
+            return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.1f : epsilon);
+        case ActivationLayerInfo::ActivationFunction::LOGISTIC:
+            if(is_data_type_fixed_point(data_type))
+            {
+                return AbsoluteTolerance<float>(5.f);
+            }
+            else
+            {
+                return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.001f : epsilon);
+            }
+        case ActivationLayerInfo::ActivationFunction::LEAKY_RELU:
+            return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.00001f : epsilon);
+        case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
+        case ActivationLayerInfo::ActivationFunction::SQRT:
+            if(is_data_type_fixed_point(data_type))
+            {
+                return AbsoluteTolerance<float>(5.f);
+            }
+            else
+            {
+                return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.01f : 0.00001f);
+            }
+        case ActivationLayerInfo::ActivationFunction::TANH:
+            if(is_data_type_fixed_point(data_type))
+            {
+                return AbsoluteTolerance<float>(5.f);
+            }
+            else
+            {
+                return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.001f : 0.00001f);
+            }
+        default:
+            return AbsoluteTolerance<float>(epsilon);
+    }
+}
+
+/** CNN data types */
+const auto CNNDataTypes = framework::dataset::make("DataType",
+{
+    DataType::F16,
+    DataType::F32,
+});
+
+/** Input data sets. */
+const auto ActivationDataset = combine(combine(framework::dataset::make("InPlace", { false, true }), datasets::ActivationFunctions()), framework::dataset::make("AlphaBeta", { 0.5f, 1.f }));
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(ActivationLayer)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), CNNDataTypes), framework::dataset::make("InPlace", { false, true })),
+               shape, data_type, in_place)
+{
+    // Set fixed point position data type allowed
+    const int fixed_point_position = 0;
+
+    // Create tensors
+    GCTensor src = create_tensor<GCTensor>(shape, data_type, 1, fixed_point_position);
+    GCTensor dst = create_tensor<GCTensor>(shape, data_type, 1, fixed_point_position);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    GCActivationLayer act_layer;
+
+    if(in_place)
+    {
+        act_layer.configure(&src, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS));
+    }
+    else
+    {
+        act_layer.configure(&src, &dst, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS));
+    }
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src.info()->valid_region(), valid_region);
+
+    if(!in_place)
+    {
+        validate(dst.info()->valid_region(), valid_region);
+    }
+
+    // Validate padding
+    const int         step    = (arm_compute::data_size_from_type(data_type) == 4 ? 1 : 2);
+    const PaddingSize padding = PaddingCalculator(shape.x(), step).required_padding();
+    validate(src.info()->padding(), padding);
+
+    if(!in_place)
+    {
+        validate(dst.info()->padding(), padding);
+    }
+}
+
+template <typename T>
+using GCActivationLayerFixture = ActivationValidationFixture<GCTensor, GCAccessor, GCActivationLayer, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCActivationLayerFixture<half_float::half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ActivationDataset),
+                                                                                                                        framework::dataset::make("DataType",
+                                                                                                                                DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance(_function, _data_type));
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCActivationLayerFixture<half_float::half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ActivationDataset),
+                                                                                                                      framework::dataset::make("DataType",
+                                                                                                                              DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance(_function, _data_type));
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCActivationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ActivationDataset), framework::dataset::make("DataType",
+                                                                                                             DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance(_function, _data_type));
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCActivationLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ActivationDataset), framework::dataset::make("DataType",
+                                                                                                           DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance(_function, _data_type));
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/BatchNormalizationLayer.cpp b/tests/validation/GLES_COMPUTE/BatchNormalizationLayer.cpp
new file mode 100644
index 0000000000..a82149bdcc
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/BatchNormalizationLayer.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/RandomBatchNormalizationLayerDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/BatchNormalizationLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr AbsoluteTolerance<float> tolerance_f(0.00001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+constexpr AbsoluteTolerance<float> tolerance_f16(0.01f);  /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(BatchNormalizationLayer)
+
+template <typename T>
+using GCBatchNormalizationLayerFixture = BatchNormalizationLayerValidationFixture<GCTensor, GCAccessor, GCBatchNormalizationLayer, T>;
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::RandomBatchNormalizationLayerDataset(), framework::dataset::make("DataType", { DataType::F32 })),
+               shape0, shape1, epsilon, dt)
+{
+    // Set fixed point position data type allowed
+    int fixed_point_position = (arm_compute::is_data_type_fixed_point(dt)) ? 3 : 0;
+
+    // Create tensors
+    GCTensor src   = create_tensor<GCTensor>(shape0, dt, 1, fixed_point_position);
+    GCTensor dst   = create_tensor<GCTensor>(shape0, dt, 1, fixed_point_position);
+    GCTensor mean  = create_tensor<GCTensor>(shape1, dt, 1, fixed_point_position);
+    GCTensor var   = create_tensor<GCTensor>(shape1, dt, 1, fixed_point_position);
+    GCTensor beta  = create_tensor<GCTensor>(shape1, dt, 1, fixed_point_position);
+    GCTensor gamma = create_tensor<GCTensor>(shape1, dt, 1, fixed_point_position);
+
+    // Create and Configure function
+    GCBatchNormalizationLayer norm;
+    norm.configure(&src, &dst, &mean, &var, &beta, &gamma, epsilon);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape0);
+    validate(dst.info()->valid_region(), valid_region);
+}
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(Random, GCBatchNormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomBatchNormalizationLayerDataset(),
+                                                                                                                  framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f16, 0);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(Random, GCBatchNormalizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomBatchNormalizationLayerDataset(),
+                                                                                                                   framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f, 0);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/DepthConcatenateLayer.cpp b/tests/validation/GLES_COMPUTE/DepthConcatenateLayer.cpp
new file mode 100644
index 0000000000..829845dd36
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/DepthConcatenateLayer.cpp
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DepthConcatenateLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(GC)
+TEST_SUITE(DepthConcatenateLayer)
+
+//TODO(COMPMID-415): Add configuration test?
+
+template <typename T>
+using GCDepthConcatenateLayerFixture = DepthConcatenateValidationFixture<GCTensor, IGCTensor, GCAccessor, GCDepthConcatenate, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthConcatenateLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
+                                                                                                                  DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthConcatenateLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
+                                                                                                                DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthConcatenateLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthConcatenateLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/DirectConvolutionLayer.cpp b/tests/validation/GLES_COMPUTE/DirectConvolutionLayer.cpp
new file mode 100644
index 0000000000..153b060757
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/DirectConvolutionLayer.cpp
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DirectConvolutionLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<half>  tolerance_fp16(half(0.2)); /**< Tolerance for floating point tests */
+RelativeTolerance<float> tolerance_fp32(0.02f);     /**< Tolerance for floating point tests */
+constexpr float          tolerance_num = 0.07f;     /**< Tolerance number */
+
+/** Direct convolution data set. */
+const auto data_quantized = combine(datasets::SmallDirectConvolutionShapes(),
+                                    combine(framework::dataset::make("StrideX", 1, 3),
+                                            combine(framework::dataset::make("StrideY", 1, 3),
+                                                    combine(concat(combine(framework::dataset::make("PadX", 0),
+                                                                           combine(framework::dataset::make("PadY", 0),
+                                                                                   framework::dataset::make("KernelSize", 1))),
+                                                                   combine(framework::dataset::make("PadX", 0, 2),
+                                                                           combine(framework::dataset::make("PadY", 0, 2),
+                                                                                   framework::dataset::make("KernelSize", { 3 })))),
+                                                            framework::dataset::make("NumKernels", { 1, 4, 8, 16 })))));
+
+const auto data = combine(datasets::SmallDirectConvolutionShapes(),
+                          combine(framework::dataset::make("StrideX", 1, 3),
+                                  combine(framework::dataset::make("StrideY", 1, 3),
+                                          combine(concat(combine(framework::dataset::make("PadX", 0),
+                                                                 combine(framework::dataset::make("PadY", 0),
+                                                                         framework::dataset::make("KernelSize", 1))),
+                                                         combine(framework::dataset::make("PadX", 0, 2),
+                                                                 combine(framework::dataset::make("PadY", 0, 2),
+                                                                         framework::dataset::make("KernelSize", { 3, 5 })))),
+                                                  framework::dataset::make("NumKernels", { 1, 4, 8, 16 })))));
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(DirectConvolutionLayer)
+
+//TODO(COMPMID-415): Configuration tests?
+
+template <typename T>
+using GCDirectConvolutionLayerFixture = DirectConvolutionValidationFixture<GCTensor, GCAccessor, GCDirectConvolutionLayer, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(Run, GCDirectConvolutionLayerFixture<half_float::half>, framework::DatasetMode::ALL, combine(data, framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_fp16, tolerance_num);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(Run, GCDirectConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(data, framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/DropoutLayer.cpp b/tests/validation/GLES_COMPUTE/DropoutLayer.cpp
new file mode 100644
index 0000000000..4d54dad15c
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/DropoutLayer.cpp
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DropoutLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+//constexpr AbsoluteTolerance<float> tolerance_f(0.00001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+
+const auto testparam = combine(framework::dataset::make("ratio", { 0.5f }), framework::dataset::make("forward", { false, true }));
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(DropoutLayer)
+
+template <typename T>
+using GCDropoutLayerFixture = DropoutLayerValidationFixture<GCTensor, GCAccessor, GCDropoutLayer, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(Random, GCDropoutLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(datasets::SmallShapes(), testparam), framework::dataset::make("DataType", DataType::F32)))
+{
+    // FIXME(APPBROWSER-302)
+    // Validate output
+    //validate(GCAccessor(_target), _reference, tolerance_f, 0);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(Random, GCDropoutLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(datasets::SmallShapes(), testparam), framework::dataset::make("DataType", DataType::F16)))
+{
+    // FIXME(APPBROWSER-302)
+    // Validate output
+    //validate(GCAccessor(_target), _reference, tolerance_f, 0);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp b/tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp
new file mode 100644
index 0000000000..4040f468f4
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/GLES_COMPUTE/Helper.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/FullyConnectedLayerDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/FullyConnectedLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Tolerance for float operations */
+RelativeTolerance<float>            tolerance_f32(0.05f);
+RelativeTolerance<half_float::half> tolerance_f16(half(0.2));
+constexpr float                     tolerance_num = 0.07f; /**< Tolerance number */
+
+/** CNN data types */
+const auto CNNDataTypes = framework::dataset::make("DataType",
+{
+    DataType::F16,
+    DataType::F32,
+});
+
+const auto FullyConnectedParameters = combine(framework::dataset::make("TransposeWeights", { false, true }), framework::dataset::make("ReshapeWeights", { false, true }));
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(FullyConnectedLayer)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallFullyConnectedLayerDataset(), datasets::LargeFullyConnectedLayerDataset()),
+                                                                           FullyConnectedParameters),
+                                                                   CNNDataTypes),
+               src_shape, weights_shape, bias_shape, dst_shape, transpose_weights, reshape_weights, data_type)
+{
+    // Set fixed point position data type allowed
+    int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0;
+
+    TensorShape ws(weights_shape);
+
+    // Transpose weights if not done in the function
+    if(!reshape_weights || !transpose_weights)
+    {
+        const size_t shape_x = ws.x();
+        ws.set(0, ws.y());
+        ws.set(1, shape_x);
+    }
+
+    // Create tensors
+    GCTensor src     = create_tensor<GCTensor>(src_shape, data_type, 1, fixed_point_position);
+    GCTensor weights = create_tensor<GCTensor>(ws, data_type, 1, fixed_point_position);
+    GCTensor bias    = create_tensor<GCTensor>(bias_shape, data_type, 1, fixed_point_position);
+    GCTensor dst     = create_tensor<GCTensor>(dst_shape, data_type, 1, fixed_point_position);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function.
+    GCFullyConnectedLayer fc;
+    fc.configure(&src, &weights, &bias, &dst, transpose_weights, !reshape_weights);
+
+    // Validate valid region
+    const ValidRegion dst_valid_region = shape_to_valid_region(dst_shape);
+    validate(dst.info()->valid_region(), dst_valid_region);
+}
+
+template <typename T>
+using GCFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture<GCTensor, GCAccessor, GCFullyConnectedLayer, T, false>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCFullyConnectedLayerFixture<half_float::half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(),
+                       FullyConnectedParameters),
+                       framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCFullyConnectedLayerFixture<half_float::half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(),
+                       FullyConnectedParameters),
+                       framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters),
+                                                                                                                 framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters),
+                                                                                                               framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/GEMM.cpp b/tests/validation/GLES_COMPUTE/GEMM.cpp
new file mode 100644
index 0000000000..2abad3206d
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/GEMM.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/GLES_COMPUTE/Helper.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/LargeGEMMDataset.h"
+#include "tests/datasets/SmallGEMMDataset.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/GEMMFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
+
+/** CNN data types */
+const auto CNNDataTypes = framework::dataset::make("DataType",
+{
+    DataType::F32,
+});
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(GEMM)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallGEMMDataset(), datasets::LargeGEMMDataset()), CNNDataTypes),
+               shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type)
+{
+    // Set fixed point position data type allowed
+    const int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0;
+
+    // Create tensors
+    GCTensor a   = create_tensor<GCTensor>(shape_a, data_type, 1, fixed_point_position);
+    GCTensor b   = create_tensor<GCTensor>(shape_b, data_type, 1, fixed_point_position);
+    GCTensor c   = create_tensor<GCTensor>(shape_c, data_type, 1, fixed_point_position);
+    GCTensor dst = create_tensor<GCTensor>(output_shape, data_type, 1, fixed_point_position);
+
+    ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    GCGEMM gemm;
+    gemm.configure(&a, &b, &c, &dst, alpha, beta);
+
+    //TODO(COMPMID-415): Validate valid region
+}
+
+template <typename T>
+using GCGEMMFixture = GEMMValidationFixture<GCTensor, GCAccessor, GCGEMM, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCGEMMFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallGEMMDataset(), framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCGEMMFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMDataset(), framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/GlobalPoolingLayer.cpp b/tests/validation/GLES_COMPUTE/GlobalPoolingLayer.cpp
new file mode 100644
index 0000000000..88372ffe24
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/GlobalPoolingLayer.cpp
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/PoolingTypesDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/PoolingLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Input data set for float data types */
+const auto GlobalPoolingLayerDataset = combine(datasets::GlobalPoolingShapes(), datasets::PoolingTypes());
+
+/** Input data set for quantized data types */
+constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for FP32 types */
+constexpr AbsoluteTolerance<float> tolerance_f16(0.01f);  /**< Tolerance value for comparing reference's output against implementation's output for FP16 types */
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(GlobalPoolingLayer)
+
+template <typename T>
+using GCGlobalPoolingLayerFixture = GlobalPoolingLayerValidationFixture<GCTensor, GCAccessor, GCPoolingLayer, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunGlobalPooling, GCGlobalPoolingLayerFixture<float>, framework::DatasetMode::ALL, combine(GlobalPoolingLayerDataset, framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunGlobalPooling, GCGlobalPoolingLayerFixture<half>, framework::DatasetMode::ALL, combine(GlobalPoolingLayerDataset, framework::dataset::make("DataType",
+                                                                                                                 DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f16);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/NormalizationLayer.cpp b/tests/validation/GLES_COMPUTE/NormalizationLayer.cpp
new file mode 100644
index 0000000000..4f6ae55677
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/NormalizationLayer.cpp
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/NormalizationTypesDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/NormalizationLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Tolerance for float operations */
+constexpr AbsoluteTolerance<float> tolerance_f32(0.00001f);
+
+/** Input data set. */
+const auto NormalizationDataset = combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("NormType", { NormType::IN_MAP_1D, NormType::CROSS_MAP })),
+                                                  framework::dataset::make("NormalizationSize", 3, 9, 2)),
+                                          framework::dataset::make("Beta", { 0.5f, 1.f, 2.f }));
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(NormalizationLayer)
+
+//TODO(COMPMID-415): Missing configuration?
+
+template <typename T>
+using GCNormalizationLayerFixture = NormalizationValidationFixture<GCTensor, GCAccessor, GCNormalizationLayer, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCNormalizationLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCNormalizationLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+template <typename T>
+using GCNormalizationLayerFixedPointFixture = NormalizationValidationFixedPointFixture<GCTensor, GCAccessor, GCNormalizationLayer, T>;
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/PoolingLayer.cpp b/tests/validation/GLES_COMPUTE/PoolingLayer.cpp
new file mode 100644
index 0000000000..a78b27edc2
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/PoolingLayer.cpp
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/PoolingTypesDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/PoolingLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+// FIXME(APPBROWSER-304): Add exclude padding support for OpenGL ES implementation
+/** Input data set for float data types */
+const auto PoolingLayerDatasetFP = combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { 2, 3, 4, 7, 9 })),
+                                                   framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })),
+                                           framework::dataset::make("ExcludePadding", { /* true, */ false }));
+
+// FIXME(APPBROWSER-304): Add exclude padding support for OpenGL ES implementation
+// constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for float types */
+// constexpr AbsoluteTolerance<float> tolerance_f16(0.01f);  /**< Tolerance value for comparing reference's output against implementation's output for float types */
+
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(PoolingLayer)
+
+template <typename T>
+using GCPoolingLayerFixture = PoolingLayerValidationFixture<GCTensor, GCAccessor, GCPoolingLayer, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCPoolingLayerFixture<float>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFP, framework::dataset::make("DataType",
+                                                                                                    DataType::F32))))
+{
+    // FIXME(APPBROWSER-304): Add exclude padding support for OpenGL ES implementation
+    // Validate output
+    // validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCPoolingLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), combine(PoolingLayerDatasetFP, framework::dataset::make("DataType",
+                                                                                                        DataType::F32))))
+{
+    // FIXME(APPBROWSER-304): Add exclude padding support for OpenGL ES implementation
+    // Validate output
+    // validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCPoolingLayerFixture<half_float::half>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFP,
+                                                                                                               framework::dataset::make("DataType", DataType::F16))))
+{
+    // FIXME(APPBROWSER-304): Add exclude padding support for OpenGL ES implementation
+    // Validate output
+    // validate(GCAccessor(_target), _reference, tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCPoolingLayerFixture<half_float::half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), combine(PoolingLayerDatasetFP,
+                                                                                                                   framework::dataset::make("DataType", DataType::F16))))
+{
+    // FIXME(APPBROWSER-304): Add exclude padding support for OpenGL ES implementation
+    // Validate output
+    // validate(GCAccessor(_target), _reference, tolerance_f16);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp b/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp
new file mode 100644
index 0000000000..888f87e9ef
--- /dev/null
+++ b/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h"
+#include "arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h"
+#include "tests/GLES_COMPUTE/GCAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/SoftmaxLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Tolerance for float operations */
+RelativeTolerance<half>  tolerance_f16(half(0.2));
+RelativeTolerance<float> tolerance_f32(0.001f);
+
+/** CNN data types */
+const auto CNNDataTypes = framework::dataset::make("DataType",
+{
+    DataType::F16,
+    DataType::F32,
+});
+} // namespace
+
+TEST_SUITE(GC)
+TEST_SUITE(SoftmaxLayer)
+
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), CNNDataTypes), shape, data_type)
+{
+    // Set fixed point position data type allowed
+    const int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0;
+
+    // Create tensors
+    GCTensor src = create_tensor<GCTensor>(shape, data_type, 1, fixed_point_position);
+    GCTensor dst = create_tensor<GCTensor>(shape, data_type, 1, fixed_point_position);
+
+    ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Create and configure function
+    GCSoftmaxLayer smx_layer;
+    smx_layer.configure(&src, &dst);
+
+    // Validate valid region
+    const ValidRegion valid_region = shape_to_valid_region(shape);
+    validate(src.info()->valid_region(), valid_region);
+    validate(dst.info()->valid_region(), valid_region);
+
+    // Validate padding
+    const PaddingSize padding = PaddingCalculator(shape.x(), 4).required_padding();
+    validate(src.info()->padding(), padding);
+    validate(dst.info()->padding(), padding);
+}
+
+template <typename T>
+using GCSoftmaxLayerFixture = SoftmaxValidationFixture<GCTensor, GCAccessor, GCSoftmaxLayer, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCSoftmaxLayerFixture<half_float::half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCSoftmaxLayerFixture<half_float::half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f16);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, GCSoftmaxLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, GCSoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(GCAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/fixtures/DropoutLayerFixture.h b/tests/validation/fixtures/DropoutLayerFixture.h
new file mode 100644
index 0000000000..3a077dbbea
--- /dev/null
+++ b/tests/validation/fixtures/DropoutLayerFixture.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2017 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_TEST_DROPOUT_LAYER_FIXTURE
+#define ARM_COMPUTE_TEST_DROPOUT_LAYER_FIXTURE
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/Tensor.h"
+#include "tests/AssetsLibrary.h"
+#include "tests/Globals.h"
+#include "tests/IAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+
+#include <random>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DropoutLayerValidationFixture : public framework::Fixture
+{
+public:
+    template <typename...>
+    void setup(TensorShape shape, float ratio, bool forward, DataType data_type)
+    {
+        _target = compute_target(shape, ratio, forward, data_type);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor)
+    {
+        library->fill_tensor_uniform(tensor, 0);
+    }
+
+    TensorType compute_target(const TensorShape &shape, float ratio, bool forward, DataType data_type)
+    {
+        // Create tensors
+        TensorType src  = create_tensor<TensorType>(shape, data_type, 1);
+        TensorType mask = create_tensor<TensorType>(shape, data_type, 1);
+        TensorType dst  = create_tensor<TensorType>(shape, data_type, 1);
+
+        // Create and configure function
+        FunctionType dropout_layer;
+        dropout_layer.configure(&src, &mask, &dst, ratio, forward);
+
+        ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Allocate tensors
+        src.allocator()->allocate();
+        mask.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!mask.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensors
+        fill(AccessorType(src));
+
+        // Compute function
+        dropout_layer.run();
+
+        return dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape, DataType data_type)
+    {
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+    int             _fractional_bits{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+
+#endif /* ARM_COMPUTE_TEST_DROPOUT_LAYER_FIXTURE */
diff --git a/utils/Utils.h b/utils/Utils.h
index 28382f47e4..76329671af 100644
--- a/utils/Utils.h
+++ b/utils/Utils.h
@@ -37,6 +37,9 @@
 #include "arm_compute/core/CL/OpenCL.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #endif /* ARM_COMPUTE_CL */
+#ifdef ARM_COMPUTE_GC
+#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h"
+#endif /* ARM_COMPUTE_GC */
 
 #include <cstdlib>
 #include <cstring>
@@ -188,6 +191,27 @@ inline void unmap(CLTensor &tensor)
 }
 #endif /* ARM_COMPUTE_CL */
 
+#ifdef ARM_COMPUTE_GC
+/** Maps a tensor if needed
+ *
+ * @param[in] tensor   Tensor to be mapped
+ * @param[in] blocking Specified if map is blocking or not
+ */
+inline void map(GCTensor &tensor, bool blocking)
+{
+    tensor.map(blocking);
+}
+
+/** Unmaps a tensor if needed
+ *
+ * @param tensor  Tensor to be unmapped
+ */
+inline void unmap(GCTensor &tensor)
+{
+    tensor.unmap();
+}
+#endif /* ARM_COMPUTE_GC */
+
 /** Class to load the content of a PPM file into an Image
  */
 class PPMLoader
@@ -256,7 +280,7 @@ public:
         ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(&image, arm_compute::Format::U8, arm_compute::Format::RGB888);
         try
         {
-            // Map buffer if creating a CLTensor
+            // Map buffer if creating a CLTensor/GCTensor
             map(image, true);
 
             // Check if the file is large enough to fill the image
@@ -319,7 +343,7 @@ public:
                     ARM_COMPUTE_ERROR("Unsupported format");
             }
 
-            // Unmap buffer if creating a CLTensor
+            // Unmap buffer if creating a CLTensor/GCTensor
             unmap(image);
         }
         catch(const std::ifstream::failure &e)
@@ -610,7 +634,7 @@ void save_to_ppm(T &tensor, const std::string &ppm_filename)
         fs << "P6\n"
            << width << " " << height << " 255\n";
 
-        // Map buffer if creating a CLTensor
+        // Map buffer if creating a CLTensor/GCTensor
         map(tensor, true);
 
         switch(tensor.info()->format())
@@ -653,7 +677,7 @@ void save_to_ppm(T &tensor, const std::string &ppm_filename)
                 ARM_COMPUTE_ERROR("Unsupported format");
         }
 
-        // Unmap buffer if creating a CLTensor
+        // Unmap buffer if creating a CLTensor/GCTensor
         unmap(tensor);
     }
     catch(const std::ofstream::failure &e)
@@ -762,7 +786,7 @@ void load_trained_data(T &tensor, const std::string &filename)
             throw std::runtime_error("Could not load binary data: " + filename);
         }
 
-        // Map buffer if creating a CLTensor
+        // Map buffer if creating a CLTensor/GCTensor
         map(tensor, true);
 
         Window window;
@@ -782,10 +806,8 @@ void load_trained_data(T &tensor, const std::string &filename)
         },
         in);
 
-#ifdef ARM_COMPUTE_CL
-        // Unmap buffer if creating a CLTensor
+        // Unmap buffer if creating a CLTensor/GCTensor
         unmap(tensor);
-#endif /* ARM_COMPUTE_CL */
     }
     catch(const std::ofstream::failure &e)
     {
-- 
cgit v1.2.1