From 7068f9900d136312318ff430aef588b14e0c87ad Mon Sep 17 00:00:00 2001 From: Anthony Barbier Date: Thu, 26 Oct 2017 15:23:08 +0100 Subject: COMPMID-631: Merge branches/gles_compute branch Last commit: commit b25c5f68042b0c81bf611d59a1bb8535e1c42497 Author: Xinghang Zhou Date: Wed Oct 25 18:48:10 2017 +0800 Synced validation's tolerances of GCSoftmax from cl side Change-Id: Ibe72054205c1c8721845d679a31af7ed0a7c5cf6 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/93283 Reviewed-by: Anthony Barbier Tested-by: Kaizen --- SConscript | 20 + SConstruct | 7 +- arm_compute/core/CL/CLHelpers.h | 2 +- .../CL/kernels/CLBatchNormalizationLayerKernel.h | 8 +- arm_compute/core/Error.h | 13 +- arm_compute/core/GLES_COMPUTE/GCHelpers.h | 64 + arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h | 306 ++++ arm_compute/core/GLES_COMPUTE/GCKernels.h | 48 + arm_compute/core/GLES_COMPUTE/IGCKernel.h | 179 ++ arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h | 41 + arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h | 43 + arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h | 66 + arm_compute/core/GLES_COMPUTE/IGCTensor.h | 99 ++ arm_compute/core/GLES_COMPUTE/OpenGLES.h | 165 ++ .../kernels/GCAbsoluteDifferenceKernel.h | 71 + .../GLES_COMPUTE/kernels/GCActivationLayerKernel.h | 68 + .../kernels/GCBatchNormalizationLayerKernel.h | 77 + .../core/GLES_COMPUTE/kernels/GCCol2ImKernel.h | 92 + .../kernels/GCDepthConcatenateKernel.h | 76 + .../kernels/GCDirectConvolutionLayerKernel.h | 87 + .../core/GLES_COMPUTE/kernels/GCDropoutKernel.h | 79 + .../core/GLES_COMPUTE/kernels/GCFillBorderKernel.h | 77 + .../kernels/GCGEMMInterleave4x4Kernel.h | 80 + .../kernels/GCGEMMMatrixAccumulateBiasesKernel.h | 63 + .../kernels/GCGEMMMatrixAdditionKernel.h | 70 + .../kernels/GCGEMMMatrixMultiplyKernel.h | 79 + .../kernels/GCGEMMTranspose1xWKernel.h | 67 + .../core/GLES_COMPUTE/kernels/GCIm2ColKernel.h | 109 ++ .../kernels/GCNormalizationLayerKernel.h | 72 + .../kernels/GCPixelWiseMultiplicationKernel.h | 70 + .../GLES_COMPUTE/kernels/GCPoolingLayerKernel.h | 70 + .../GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h | 112 ++ .../core/GLES_COMPUTE/kernels/GCTransposeKernel.h | 52 + arm_compute/core/Log.h | 54 +- .../NEON/kernels/NEBatchNormalizationLayerKernel.h | 4 +- arm_compute/core/Utils.h | 23 +- .../CL/functions/CLBatchNormalizationLayer.h | 6 +- .../runtime/CL/functions/CLNormalizationLayer.h | 4 +- arm_compute/runtime/GLES_COMPUTE/GCFunctions.h | 45 + arm_compute/runtime/GLES_COMPUTE/GCScheduler.h | 73 + arm_compute/runtime/GLES_COMPUTE/GCTensor.h | 100 ++ .../runtime/GLES_COMPUTE/GCTensorAllocator.h | 128 ++ .../runtime/GLES_COMPUTE/IGCSimpleFunction.h | 50 + .../GLES_COMPUTE/functions/GCAbsoluteDifference.h | 52 + .../GLES_COMPUTE/functions/GCActivationLayer.h | 53 + .../functions/GCBatchNormalizationLayer.h | 67 + .../GLES_COMPUTE/functions/GCDepthConcatenate.h | 67 + .../functions/GCDirectConvolutionLayer.h | 59 + .../GLES_COMPUTE/functions/GCDropoutLayer.h | 63 + .../runtime/GLES_COMPUTE/functions/GCFillBorder.h | 52 + .../GLES_COMPUTE/functions/GCFullyConnectedLayer.h | 96 ++ .../runtime/GLES_COMPUTE/functions/GCGEMM.h | 85 + .../GLES_COMPUTE/functions/GCGEMMInterleave4x4.h | 50 + .../GLES_COMPUTE/functions/GCGEMMTranspose1xW.h | 47 + .../GLES_COMPUTE/functions/GCNormalizationLayer.h | 71 + .../functions/GCPixelWiseMultiplication.h | 48 + .../GLES_COMPUTE/functions/GCPoolingLayer.h | 53 + .../GLES_COMPUTE/functions/GCSoftmaxLayer.h | 69 + .../runtime/GLES_COMPUTE/functions/GCTranspose.h | 50 + .../NEON/functions/NEBatchNormalizationLayer.h | 4 +- .../runtime/NEON/functions/NENormalizationLayer.h | 4 +- examples/SConscript | 23 + examples/gc_absdiff.cpp | 112 ++ opengles-3.1/include/EGL/egl.h | 303 ++++ opengles-3.1/include/EGL/eglext.h | 804 +++++++++ opengles-3.1/include/EGL/eglplatform.h | 122 ++ opengles-3.1/include/GLES/gl.h | 770 +++++++++ opengles-3.1/include/GLES/glext.h | 1278 ++++++++++++++ opengles-3.1/include/GLES/glplatform.h | 30 + opengles-3.1/include/GLES2/gl2.h | 620 +++++++ opengles-3.1/include/GLES2/gl2ext.h | 1809 ++++++++++++++++++++ opengles-3.1/include/GLES2/gl2platform.h | 30 + opengles-3.1/include/GLES3/gl3.h | 1061 ++++++++++++ opengles-3.1/include/GLES3/gl31.h | 1187 +++++++++++++ opengles-3.1/include/GLES3/gl3ext.h | 24 + opengles-3.1/include/GLES3/gl3platform.h | 30 + opengles-3.1/include/KHR/khrplatform.h | 273 +++ opengles-3.1/mali_include/EGL/fbdev_window.h | 50 + opengles-3.1/stubs/EGL.c | 40 + opengles-3.1/stubs/GLESv2.c | 269 +++ opengles-3.1/stubs/Readme.txt | 2 + opengles-3.1/stubs/SConscript | 11 + scripts/check_bad_style.sh | 2 +- scripts/clang_tidy_rules.py | 3 +- scripts/fix_code_formatting.sh | 2 +- src/core/CL/cl_kernels/direct_convolution1x1.cl | 4 +- src/core/CL/cl_kernels/direct_convolution3x3.cl | 4 +- src/core/CL/cl_kernels/direct_convolution5x5.cl | 4 +- src/core/Error.cpp | 18 +- src/core/GLES_COMPUTE/GCKernelLibrary.cpp | 716 ++++++++ src/core/GLES_COMPUTE/IGCKernel.cpp | 157 ++ src/core/GLES_COMPUTE/IGCSimple2DKernel.cpp | 51 + src/core/GLES_COMPUTE/IGCSimple3DKernel.cpp | 52 + src/core/GLES_COMPUTE/IGCSimpleKernel.cpp | 54 + src/core/GLES_COMPUTE/IGCTensor.cpp | 54 + src/core/GLES_COMPUTE/OpenGLES.cpp | 820 +++++++++ src/core/GLES_COMPUTE/cs_shaders/absdiff.cs | 71 + .../GLES_COMPUTE/cs_shaders/activation_layer.cs | 262 +++ .../cs_shaders/batchnormalization_layer.cs | 222 +++ src/core/GLES_COMPUTE/cs_shaders/concatenate.cs | 106 ++ .../GLES_COMPUTE/cs_shaders/convolution_layer.cs | 302 ++++ .../cs_shaders/direct_convolution1x1.cs | 275 +++ .../cs_shaders/direct_convolution3x3.cs | 1583 +++++++++++++++++ .../cs_shaders/direct_convolution5x5.cs | 313 ++++ src/core/GLES_COMPUTE/cs_shaders/dropout.cs | 204 +++ src/core/GLES_COMPUTE/cs_shaders/fill_border.cs | 553 ++++++ src/core/GLES_COMPUTE/cs_shaders/gemm.cs | 623 +++++++ src/core/GLES_COMPUTE/cs_shaders/helpers.h | 582 +++++++ .../GLES_COMPUTE/cs_shaders/normalization_layer.cs | 157 ++ .../GLES_COMPUTE/cs_shaders/pixelwise_mul_float.cs | 75 + src/core/GLES_COMPUTE/cs_shaders/pooling_layer.cs | 1444 ++++++++++++++++ src/core/GLES_COMPUTE/cs_shaders/softmax_layer.cs | 541 ++++++ src/core/GLES_COMPUTE/cs_shaders/transpose.cs | 187 ++ src/core/GLES_COMPUTE/egl_entries.in | 35 + src/core/GLES_COMPUTE/gl_entries.in | 63 + .../kernels/GCAbsoluteDifferenceKernel.cpp | 112 ++ .../kernels/GCActivationLayerKernel.cpp | 128 ++ .../kernels/GCBatchNormalizationLayerKernel.cpp | 129 ++ src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp | 101 ++ .../kernels/GCDepthConcatenateKernel.cpp | 145 ++ .../kernels/GCDirectConvolutionLayerKernel.cpp | 394 +++++ src/core/GLES_COMPUTE/kernels/GCDropoutKernel.cpp | 110 ++ .../GLES_COMPUTE/kernels/GCFillBorderKernel.cpp | 169 ++ .../kernels/GCGEMMInterleave4x4Kernel.cpp | 129 ++ .../kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp | 123 ++ .../kernels/GCGEMMMatrixAdditionKernel.cpp | 104 ++ .../kernels/GCGEMMMatrixMultiplyKernel.cpp | 210 +++ .../kernels/GCGEMMTranspose1xWKernel.cpp | 128 ++ src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp | 230 +++ .../kernels/GCNormalizationLayerKernel.cpp | 124 ++ .../kernels/GCPixelWiseMultiplicationKernel.cpp | 127 ++ .../GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp | 254 +++ .../GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp | 353 ++++ .../GLES_COMPUTE/kernels/GCTransposeKernel.cpp | 116 ++ src/core/Helpers.cpp | 7 + src/core/Utils.cpp | 3 +- src/runtime/CL/functions/CLNormalizationLayer.cpp | 2 +- src/runtime/GLES_COMPUTE/GCScheduler.cpp | 61 + src/runtime/GLES_COMPUTE/GCTensor.cpp | 77 + src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp | 94 + src/runtime/GLES_COMPUTE/IGCSimpleFunction.cpp | 45 + .../functions/GCAbsoluteDifference.cpp | 40 + .../GLES_COMPUTE/functions/GCActivationLayer.cpp | 37 + .../functions/GCBatchNormalizationLayer.cpp | 48 + .../GLES_COMPUTE/functions/GCDepthConcatenate.cpp | 69 + .../functions/GCDirectConvolutionLayer.cpp | 64 + .../GLES_COMPUTE/functions/GCDropoutLayer.cpp | 50 + .../GLES_COMPUTE/functions/GCFillBorder.cpp | 40 + .../functions/GCFullyConnectedLayer.cpp | 177 ++ src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp | 133 ++ .../GLES_COMPUTE/functions/GCGEMMInterleave4x4.cpp | 36 + .../GLES_COMPUTE/functions/GCGEMMTranspose1xW.cpp | 38 + .../functions/GCNormalizationLayer.cpp | 61 + .../functions/GCPixelWiseMultiplication.cpp | 38 + .../GLES_COMPUTE/functions/GCPoolingLayer.cpp | 42 + .../GLES_COMPUTE/functions/GCSoftmaxLayer.cpp | 66 + src/runtime/GLES_COMPUTE/functions/GCTranspose.cpp | 38 + .../NEON/functions/NENormalizationLayer.cpp | 2 +- tests/GLES_COMPUTE/GCAccessor.h | 144 ++ tests/GLES_COMPUTE/Helper.h | 115 ++ tests/SConscript | 29 + .../GLES_COMPUTE/DirectConvolutionLayer.cpp | 107 ++ .../benchmark/GLES_COMPUTE/FullyConnectedLayer.cpp | 106 ++ tests/benchmark/GLES_COMPUTE/GEMM.cpp | 56 + tests/benchmark/GLES_COMPUTE/PoolingLayer.cpp | 128 ++ tests/benchmark/fixtures/ConvolutionLayerFixture.h | 15 + tests/datasets/FullyConnectedLayerDataset.h | 1 + tests/framework/Framework.cpp | 11 + tests/framework/SConscript | 5 + tests/main.cpp | 7 + tests/validation/GLES_COMPUTE/ActivationLayer.cpp | 198 +++ .../GLES_COMPUTE/BatchNormalizationLayer.cpp | 103 ++ .../GLES_COMPUTE/DepthConcatenateLayer.cpp | 86 + .../GLES_COMPUTE/DirectConvolutionLayer.cpp | 103 ++ tests/validation/GLES_COMPUTE/DropoutLayer.cpp | 82 + .../GLES_COMPUTE/FullyConnectedLayer.cpp | 143 ++ tests/validation/GLES_COMPUTE/GEMM.cpp | 105 ++ .../validation/GLES_COMPUTE/GlobalPoolingLayer.cpp | 83 + .../validation/GLES_COMPUTE/NormalizationLayer.cpp | 85 + tests/validation/GLES_COMPUTE/PoolingLayer.cpp | 105 ++ tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp | 122 ++ tests/validation/fixtures/DropoutLayerFixture.h | 106 ++ utils/Utils.h | 38 +- 183 files changed, 28869 insertions(+), 64 deletions(-) create mode 100644 arm_compute/core/GLES_COMPUTE/GCHelpers.h create mode 100644 arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h create mode 100644 arm_compute/core/GLES_COMPUTE/GCKernels.h create mode 100644 arm_compute/core/GLES_COMPUTE/IGCKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/IGCTensor.h create mode 100644 arm_compute/core/GLES_COMPUTE/OpenGLES.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h create mode 100644 arm_compute/runtime/GLES_COMPUTE/GCFunctions.h create mode 100644 arm_compute/runtime/GLES_COMPUTE/GCScheduler.h create mode 100644 arm_compute/runtime/GLES_COMPUTE/GCTensor.h create mode 100644 arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h create mode 100644 arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.h create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCFillBorder.h create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.h create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.h create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.h create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h create mode 100644 arm_compute/runtime/GLES_COMPUTE/functions/GCTranspose.h create mode 100644 examples/gc_absdiff.cpp create mode 100644 opengles-3.1/include/EGL/egl.h create mode 100644 opengles-3.1/include/EGL/eglext.h create mode 100644 opengles-3.1/include/EGL/eglplatform.h create mode 100644 opengles-3.1/include/GLES/gl.h create mode 100644 opengles-3.1/include/GLES/glext.h create mode 100644 opengles-3.1/include/GLES/glplatform.h create mode 100644 opengles-3.1/include/GLES2/gl2.h create mode 100644 opengles-3.1/include/GLES2/gl2ext.h create mode 100644 opengles-3.1/include/GLES2/gl2platform.h create mode 100644 opengles-3.1/include/GLES3/gl3.h create mode 100644 opengles-3.1/include/GLES3/gl31.h create mode 100644 opengles-3.1/include/GLES3/gl3ext.h create mode 100644 opengles-3.1/include/GLES3/gl3platform.h create mode 100644 opengles-3.1/include/KHR/khrplatform.h create mode 100644 opengles-3.1/mali_include/EGL/fbdev_window.h create mode 100644 opengles-3.1/stubs/EGL.c create mode 100644 opengles-3.1/stubs/GLESv2.c create mode 100644 opengles-3.1/stubs/Readme.txt create mode 100644 opengles-3.1/stubs/SConscript create mode 100644 src/core/GLES_COMPUTE/GCKernelLibrary.cpp create mode 100644 src/core/GLES_COMPUTE/IGCKernel.cpp create mode 100644 src/core/GLES_COMPUTE/IGCSimple2DKernel.cpp create mode 100644 src/core/GLES_COMPUTE/IGCSimple3DKernel.cpp create mode 100644 src/core/GLES_COMPUTE/IGCSimpleKernel.cpp create mode 100644 src/core/GLES_COMPUTE/IGCTensor.cpp create mode 100644 src/core/GLES_COMPUTE/OpenGLES.cpp create mode 100644 src/core/GLES_COMPUTE/cs_shaders/absdiff.cs create mode 100644 src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs create mode 100644 src/core/GLES_COMPUTE/cs_shaders/batchnormalization_layer.cs create mode 100644 src/core/GLES_COMPUTE/cs_shaders/concatenate.cs create mode 100644 src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs create mode 100644 src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs create mode 100644 src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs create mode 100644 src/core/GLES_COMPUTE/cs_shaders/direct_convolution5x5.cs create mode 100644 src/core/GLES_COMPUTE/cs_shaders/dropout.cs create mode 100644 src/core/GLES_COMPUTE/cs_shaders/fill_border.cs create mode 100755 src/core/GLES_COMPUTE/cs_shaders/gemm.cs create mode 100644 src/core/GLES_COMPUTE/cs_shaders/helpers.h create mode 100755 src/core/GLES_COMPUTE/cs_shaders/normalization_layer.cs create mode 100644 src/core/GLES_COMPUTE/cs_shaders/pixelwise_mul_float.cs create mode 100644 src/core/GLES_COMPUTE/cs_shaders/pooling_layer.cs create mode 100644 src/core/GLES_COMPUTE/cs_shaders/softmax_layer.cs create mode 100755 src/core/GLES_COMPUTE/cs_shaders/transpose.cs create mode 100644 src/core/GLES_COMPUTE/egl_entries.in create mode 100644 src/core/GLES_COMPUTE/gl_entries.in create mode 100644 src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp create mode 100644 src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp create mode 100644 src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp create mode 100644 src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp create mode 100644 src/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.cpp create mode 100644 src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp create mode 100644 src/core/GLES_COMPUTE/kernels/GCDropoutKernel.cpp create mode 100644 src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp create mode 100644 src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp create mode 100644 src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp create mode 100644 src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp create mode 100644 src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp create mode 100644 src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp create mode 100644 src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp create mode 100644 src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp create mode 100644 src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp create mode 100644 src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp create mode 100644 src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp create mode 100644 src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp create mode 100644 src/runtime/GLES_COMPUTE/GCScheduler.cpp create mode 100644 src/runtime/GLES_COMPUTE/GCTensor.cpp create mode 100644 src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp create mode 100644 src/runtime/GLES_COMPUTE/IGCSimpleFunction.cpp create mode 100644 src/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.cpp create mode 100644 src/runtime/GLES_COMPUTE/functions/GCActivationLayer.cpp create mode 100755 src/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.cpp create mode 100755 src/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.cpp create mode 100644 src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp create mode 100644 src/runtime/GLES_COMPUTE/functions/GCDropoutLayer.cpp create mode 100644 src/runtime/GLES_COMPUTE/functions/GCFillBorder.cpp create mode 100644 src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp create mode 100644 src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp create mode 100644 src/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.cpp create mode 100644 src/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.cpp create mode 100644 src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp create mode 100755 src/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.cpp create mode 100644 src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp create mode 100644 src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp create mode 100644 src/runtime/GLES_COMPUTE/functions/GCTranspose.cpp create mode 100644 tests/GLES_COMPUTE/GCAccessor.h create mode 100644 tests/GLES_COMPUTE/Helper.h create mode 100644 tests/benchmark/GLES_COMPUTE/DirectConvolutionLayer.cpp create mode 100644 tests/benchmark/GLES_COMPUTE/FullyConnectedLayer.cpp create mode 100644 tests/benchmark/GLES_COMPUTE/GEMM.cpp create mode 100644 tests/benchmark/GLES_COMPUTE/PoolingLayer.cpp create mode 100644 tests/validation/GLES_COMPUTE/ActivationLayer.cpp create mode 100644 tests/validation/GLES_COMPUTE/BatchNormalizationLayer.cpp create mode 100644 tests/validation/GLES_COMPUTE/DepthConcatenateLayer.cpp create mode 100644 tests/validation/GLES_COMPUTE/DirectConvolutionLayer.cpp create mode 100644 tests/validation/GLES_COMPUTE/DropoutLayer.cpp create mode 100644 tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp create mode 100644 tests/validation/GLES_COMPUTE/GEMM.cpp create mode 100644 tests/validation/GLES_COMPUTE/GlobalPoolingLayer.cpp create mode 100644 tests/validation/GLES_COMPUTE/NormalizationLayer.cpp create mode 100644 tests/validation/GLES_COMPUTE/PoolingLayer.cpp create mode 100644 tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp create mode 100644 tests/validation/fixtures/DropoutLayerFixture.h diff --git a/SConscript b/SConscript index acaa9a593d..0679c85070 100644 --- a/SConscript +++ b/SConscript @@ -179,6 +179,26 @@ if env['neon']: runtime_files += Glob('src/runtime/NEON/*.cpp') runtime_files += Glob('src/runtime/NEON/functions/*.cpp') +if env['gles_compute']: + if env['os'] != 'android': + arm_compute_env.Append(CPPPATH = ["#opengles-3.1/include", "#opengles-3.1/mali_include"]) + + core_files += Glob('src/core/GLES_COMPUTE/*.cpp') + core_files += Glob('src/core/GLES_COMPUTE/kernels/*.cpp') + + runtime_files += Glob('src/runtime/GLES_COMPUTE/*.cpp') + runtime_files += Glob('src/runtime/GLES_COMPUTE/functions/*.cpp') + + # Generate embed files + if env['embed_kernels']: + cs_files = Glob('src/core/GLES_COMPUTE/cs_shaders/*.cs') + cs_files += Glob('src/core/GLES_COMPUTE/cs_shaders/*.h') + + embed_files = [ f.get_path()+"embed" for f in cs_files ] + arm_compute_env.Append(CPPPATH =[Dir("./src/core/GLES_COMPUTE/").path] ) + + generate_embed.append(arm_compute_env.Command(embed_files, cs_files, action=resolve_includes)) + static_core_objects = [arm_compute_env.StaticObject(f) for f in core_files] shared_core_objects = [arm_compute_env.SharedObject(f) for f in core_files] diff --git a/SConstruct b/SConstruct index 904a85fe43..264ed9cbe0 100644 --- a/SConstruct +++ b/SConstruct @@ -48,7 +48,8 @@ vars.AddVariables( BoolVariable("standalone", "Builds the tests as standalone executables, links statically with libgcc, libstdc++ and libarm_compute", False), BoolVariable("opencl", "Enable OpenCL support", True), BoolVariable("neon", "Enable Neon support", False), - BoolVariable("embed_kernels", "Embed OpenCL kernels in library binary", False), + BoolVariable("gles_compute", "Enable OpenGL ES Compute Shader support", False), + BoolVariable("embed_kernels", "Embed OpenCL kernels and OpenGL ES compute shaders in library binary", False), BoolVariable("set_soname", "Set the library's soname and shlibversion (requires SCons 2.4 or above)", False), BoolVariable("openmp", "Enable OpenMP backend", False), BoolVariable("cppthreads", "Enable C++11 threads backend", True), @@ -199,6 +200,7 @@ if env['opencl']: print("Cannot link OpenCL statically, which is required on bare metal") Exit(1) +if env['opencl'] or env['gles_compute']: if env['embed_kernels']: env.Append(CPPDEFINES = ['EMBEDDED_KERNELS']) @@ -229,6 +231,9 @@ SConscript('./SConscript', variant_dir='#build/%s' % env['build_dir'], duplicate if env['opencl']: SConscript("./opencl-1.2-stubs/SConscript", variant_dir="build/%s/opencl-1.2-stubs" % env['build_dir'], duplicate=0) +if env['gles_compute'] and env['os'] != 'android': + SConscript("./opengles-3.1/stubs/SConscript", variant_dir="build/%s/opengles-3.1/stubs" % env['build_dir'], duplicate=0) + if env['examples'] and env['os'] != 'bare_metal': SConscript('./examples/SConscript', variant_dir='#build/%s/examples' % env['build_dir'], duplicate=0) diff --git a/arm_compute/core/CL/CLHelpers.h b/arm_compute/core/CL/CLHelpers.h index b93bae8d82..365ecb06c4 100644 --- a/arm_compute/core/CL/CLHelpers.h +++ b/arm_compute/core/CL/CLHelpers.h @@ -43,7 +43,7 @@ struct enable_bitwise_ops }; /** Max vector width of an OpenCL vector */ -static constexpr const unsigned int max_cl_vector_width = 16; +static constexpr unsigned int max_cl_vector_width = 16; /** Translates a tensor data type to the appropriate OpenCL type. * diff --git a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h index a24432145a..a5559bf8aa 100644 --- a/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLBatchNormalizationLayerKernel.h @@ -55,24 +55,24 @@ public: * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. * 3 lower dimensions represent a single input with dimensions [width, height, FM]. * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input * @param[in] epsilon Small value to avoid division with zero. - * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input */ void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon); /** Static function to check if given info will lead to a valid configuration of @ref CLBatchNormalizationLayerKernel * * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result. * 3 lower dimensions represent a single input with dimensions [width, height, FM]. - * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32. + * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] gamma Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input * @param[in] beta Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input * @param[in] epsilon Small value to avoid division with zero. * * @return an error status diff --git a/arm_compute/core/Error.h b/arm_compute/core/Error.h index fa3f9c0615..277db9d64d 100644 --- a/arm_compute/core/Error.h +++ b/arm_compute/core/Error.h @@ -24,6 +24,7 @@ #ifndef __ARM_COMPUTE_ERROR_H__ #define __ARM_COMPUTE_ERROR_H__ +#include #include #include @@ -106,6 +107,16 @@ private: std::string _description; }; +/** Creates an error containing the error message from variable argument list + * + * @param[in] error_code Error code + * @param[in] function Function in which the error occurred. + * @param[in] file Name of the file where the error occurred. + * @param[in] line Line on which the error occurred. + * @param[in] msg Message to display before aborting. + * @param[in] args Variable argument list of the message. + */ +Error create_error_va_list(ErrorCode error_code, const char *function, const char *file, const int line, const char *msg, va_list args); /** Creates an error containing the error message * * @param[in] error_code Error code @@ -241,7 +252,7 @@ Error create_error(ErrorCode error_code, const char *function, const char *file, * @param[in] error Error value to check. */ #define ARM_COMPUTE_ERROR_THROW_ON(error) \ - error.throw_if_error(); + error.throw_if_error() /** If the condition is true, the given message is printed and an exception is thrown * diff --git a/arm_compute/core/GLES_COMPUTE/GCHelpers.h b/arm_compute/core/GLES_COMPUTE/GCHelpers.h new file mode 100644 index 0000000000..475554f2be --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/GCHelpers.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCHELPERS_H__ +#define __ARM_COMPUTE_GCHELPERS_H__ + +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/Helpers.h" +#include "support/ToolchainSupport.h" + +#include + +namespace arm_compute +{ +/** Helper function to create and return a unique_ptr pointed to a GLES kernel object + * It also calls the kernel's configuration. + * + * @param[in] args All the arguments that need pass to kernel's configuration. + * + * @return A unique pointer pointed to a GLES kernel object + */ +template +std::unique_ptr create_configure_kernel(T &&... args) +{ + std::unique_ptr k = arm_compute::support::cpp14::make_unique(); + k->configure(std::forward(args)...); + return k; +} + +/** Helper function to create and return a unique_ptr pointed to a GLES kernel object + * + * @return A unique pointer pointed to a GLES kernel object + */ +template +std::unique_ptr create_kernel() +{ + std::unique_ptr k = arm_compute::support::cpp14::make_unique(); + return k; +} + +/** Max vector width of an GLES vector */ +static constexpr unsigned int max_gc_vector_width = 16; +} +#endif /* __ARM_COMPUTE_GCHELPERS_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h b/arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h new file mode 100644 index 0000000000..e601b529ed --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCKERNELLIBRARY_H__ +#define __ARM_COMPUTE_GCKERNELLIBRARY_H__ + +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/Utils.h" + +#include +#include +#include +#include +#include + +namespace arm_compute +{ +/** GCProgram class */ +class GCProgram +{ +public: + /** Default constructor. */ + GCProgram(); + /** Construct program from source file. + * + * @param[in] name Program name. + * @param[in] source Program source. + */ + GCProgram(std::string name, std::string source); + /** Default Copy Constructor. */ + GCProgram(const GCProgram &) = default; + /** Default Move Constructor. */ + GCProgram(GCProgram &&) = default; + /** Default copy assignment operator. */ + GCProgram &operator=(const GCProgram &) = default; + /** Default move assignment operator. */ + GCProgram &operator=(GCProgram &&) = default; + /** Returns program name. + * + * @return Program's name. + */ + std::string name() const + { + return _name; + } + /** Link program. + * + * @param[in] shader Shader used to link program. + * + * @return linked program id . + */ + GLuint link_program(GLuint shader); + /** Compile shader. + * + * @param[in] build_options Shader build options. + * + * @return GLES shader object. + */ + GLuint compile_shader(const std::string &build_options); + +private: + std::string _name; /**< Program name. */ + std::string _source; /**< Source code for the program. */ +}; + +/** GCKernel class */ +class GCKernel +{ +public: + /** Default Constructor. */ + GCKernel(); + /** Default Copy Constructor. */ + GCKernel(const GCKernel &) = default; + /** Default Move Constructor. */ + GCKernel(GCKernel &&) = default; + /** Default copy assignment operator. */ + GCKernel &operator=(const GCKernel &) = default; + /** Default move assignment operator. */ + GCKernel &operator=(GCKernel &&) = default; + /** Constructor. + * + * @param[in] name Kernel name. + * @param[in] program Built program. + */ + GCKernel(std::string name, GLuint program); + /** Returns kernel name. + * + * @return Kernel's name. + */ + std::string name() const + { + return _name; + } + /** Get program id. + * + * @return program id. + */ + GLuint get_program() const + { + return _program; + } + /** Use current program. + * + * @return program id. + */ + void use(); + /** Unuse current program. + * + * @return program id. + */ + void unuse(); + /** Set value at uniform idx. + * + * @param[in] idx Index in vector. + * @param[in] value Set value. + */ + template + void set_params(unsigned int idx, T value) + { + if(idx >= _params.size()) + { + _params.resize(idx + 1, 0); + } + + unsigned int *p = reinterpret_cast(&value); + _params[idx] = *p; + } + /** Clear params. + * + */ + void clear_params() + { + _params.clear(); + } + /** Set shader params binding point. + * + * @param[in] binding Shader params binding point. + */ + void set_shader_params_binding_point(unsigned int binding) + { + _shader_params_binding_point = binding; + } + /** Update shader params. + * + */ + void update_shader_params(); + /** Clean up program and ubo. + * + */ + void cleanup(); + +private: + std::string _name; /**< Kernel name */ + GLuint _program; /**< Linked program id */ + std::vector _params; /**< Store all the values of the shader parameters */ + GLuint _shader_params; /**< Uniform buffer object name for shader parameters */ + GLuint _shader_params_binding_point; /**< The binding point of the uniform block for shader parameters */ + GLuint _shader_params_index; /**< The index of the uniform block */ + GLint _shader_params_size; /**< The uniform block data size in the shader */ + static constexpr const char *_shader_params_name = "shader_params"; /**< The uniform block name in the shader */ +}; + +/** GCKernelLibrary class */ +class GCKernelLibrary +{ + using StringSet = std::set; + +private: + /** Default Constructor. */ + GCKernelLibrary(); + +public: + /** Prevent instances of this class from being copied. */ + GCKernelLibrary(const GCKernelLibrary &) = delete; + /** Prevent instances of this class from being copied. */ + const GCKernelLibrary &operator=(const GCKernelLibrary &) = delete; + /** Default Destructor. */ + ~GCKernelLibrary(); + + static GCKernelLibrary &get(); + /** Initialises the kernel library. + * + * @param[in] shader_path (Optional) Path of the directory from which shader sources are loaded. + * @param[in] dpy (Optional) EGLdisplay set by external application. + * @param[in] ctx (Optional) EGLContext set by external application. + */ + void init(std::string shader_path = "./", EGLDisplay dpy = EGL_NO_DISPLAY, EGLContext ctx = EGL_NO_CONTEXT) + { + //TODO: deal with old display and context. + _shader_path = std::move(shader_path); + + _display = dpy; + _context = ctx; + + if(_display == EGL_NO_DISPLAY || _context == EGL_NO_CONTEXT) + { + setup_context(); + + _own_context = true; + } + + eglMakeCurrent(_display, EGL_NO_SURFACE, EGL_NO_SURFACE, _context); + setup_dummy_fbo(); + } + + /** Sets the path that the shaders reside in. + * + * @param[in] shader_path Path of the shader. + */ + void set_shader_path(const std::string &shader_path) + { + _shader_path = shader_path; + }; + /** Sets display and context to create kernel. + * + * @param[in] dpy EGLdisplay set by external application. + * @param[in] ctx EGLContext set by external application. + */ + void set_context(EGLDisplay dpy, EGLContext ctx) + { + //TODO: deal with old display and context. + _display = dpy; + _context = ctx; + + eglMakeCurrent(dpy, EGL_NO_SURFACE, EGL_NO_SURFACE, ctx); + setup_dummy_fbo(); + }; + /** Creates a kernel from the kernel library. + * + * @param[in] shader_name Shader name. + * @param[in] build_options_set Shader build options as a set. + * + * @return The created kernel. + */ + GCKernel create_kernel(const std::string &shader_name, const StringSet &build_options_set = {}) const; + /** Serializes and saves programs to a binary. + * + */ + void save_binary(); + /** Load serialized binary with all the programs. + * + */ + void load_binary(); + /** Setup a dummy fbo to workaround an issue on Galaxy S8. + * + */ + void setup_dummy_fbo(); + +private: + /** Preprocess GLES shader + * + * @param[in] shader_source Source code of the shader to preprocess. + * + * @return Preprocessed GLES shader object. + */ + const std::string preprocess_shader(const std::string &shader_source) const; + /** Load program and its dependencies. + * + * @param[in] program_name Name of the program to load. + */ + const GCProgram &load_program(const std::string &program_name) const; + /** Concatenates contents of a set into a single string. + * + * @param[in] s Input set to concatenate. + * + * @return Concatenated string. + */ + std::string stringify_set(const StringSet &s) const; + /** Set up EGL context. + */ + void setup_context(); + + EGLDisplay _display; /**< Underlying EGL Display. */ + EGLContext _context; /**< Underlying EGL Context. */ + GLuint _frame_buffer; /**< Dummy fbo */ + GLuint _tex_rt; /**< Dummy texture for render target */ + bool _own_context; /**< Self created context or not. */ + std::string _shader_path; /**< Path to the shaders folder. */ + mutable std::map _programs_map; /**< Map with all already loaded program data. */ + mutable std::map _built_programs_map; /**< Map with all already built program data. */ + static const std::map _shader_program_map; /**< Map that associates kernel names with programs. */ + static const std::map _program_source_map; /**< Contains sources for all programs. + Used for compile-time shader inclusion. */ +}; +} +#endif /* __ARM_COMPUTE_GCKERNELLIBRARY_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/GCKernels.h b/arm_compute/core/GLES_COMPUTE/GCKernels.h new file mode 100644 index 0000000000..57d11d5f18 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/GCKernels.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCKERNELS_H__ +#define __ARM_COMPUTE_GCKERNELS_H__ + +/* Header regrouping all the GLES compute kernels */ +#include "arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h" + +#endif /* __ARM_COMPUTE_GCKERNELS_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/IGCKernel.h b/arm_compute/core/GLES_COMPUTE/IGCKernel.h new file mode 100644 index 0000000000..0d3bfb30fd --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/IGCKernel.h @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IGCKERNEL_H__ +#define __ARM_COMPUTE_IGCKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" + +#include "arm_compute/core/IKernel.h" + +namespace arm_compute +{ +class IGCTensor; +class Window; + +/** Common interface for all the GLES kernels */ +class IGCKernel : public IKernel +{ +public: + /** Constructor */ + IGCKernel(); + /** Returns a reference to the GLES kernel of this object. + * + * @return A reference to the GLES kernel of this object. + */ + GCKernel &kernel(); + + class BufferParam + { + public: + /** Tensor's binding point in this kernel. */ + unsigned int binding_point = 0; + /** The base 2 logarithm of SSBO buffer data type size (Number of bits to be shift for offset calculation) */ + unsigned int buffer_data_type_shift = 0; + + /** Constructor + * + * @param[in] binding Tensor's binding point. + * @param[in] shift Number of bits to be shift for offset calculation + */ + BufferParam(const unsigned int binding, const unsigned int shift) + : binding_point(binding), buffer_data_type_shift(shift) + { + } + }; + + /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] binding_point Tensor's binding point in this kernel. + * @param[in] window Window the kernel will be executed on. + */ + void add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window); + + /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] param Additional parameter for GLES SSBO buffer. + * @param[in] window Window the kernel will be executed on. + */ + void add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam ¶m, const Window &window); + + /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] binding_point Tensor's binding point in this kernel. + * @param[in] window Window the kernel will be executed on. + */ + void add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window); + + /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] param Additional parameter for GLES SSBO buffer. + * @param[in] window Window the kernel will be executed on. + */ + void add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam ¶m, const Window &window); + + /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] binding_point Tensor's binding point in this kernel. + * @param[in] window Window the kernel will be executed on. + */ + void add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window); + + /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] param Additional parameter for GLES SSBO buffer. + * @param[in] window Window the kernel will be executed on. + */ + void add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam ¶m, const Window &window); + + /** Returns the number of arguments enqueued per 1D tensor object. + * + * @return The number of arguments enqueues per 1D tensor object. + */ + unsigned int num_arguments_per_1D_tensor() const; + /** Returns the number of arguments enqueued per 2D tensor object. + * + * @return The number of arguments enqueues per 2D tensor object. + */ + unsigned int num_arguments_per_2D_tensor() const; + /** Returns the number of arguments enqueued per 3D tensor object. + * + * @return The number of arguments enqueues per 3D tensor object. + */ + unsigned int num_arguments_per_3D_tensor() const; + /** Enqueue the OpenGL ES shader to process the given window + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + virtual void run(const Window &window) = 0; + +private: + /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] param Additional parameter for GLES SSBO buffer. + * @param[in] window Window the kernel will be executed on. + */ + template + void add_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam ¶m, const Window &window); + + /** Returns the number of arguments enqueued per tensor object. + * + * @return The number of arguments enqueued per tensor object. + */ + template + unsigned int num_arguments_per_tensor() const; + +protected: + GCKernel _kernel; /**< GLES kernel to run */ +}; + +/** Add the kernel to the command queue with the given window. + * + * @note Depending on the size of the window, this might translate into several jobs being enqueued. + * + * @note If kernel->kernel() is empty then the function will return without adding anything to the queue. + * + * @param[in] kernel Kernel to enqueue + * @param[in] window Window the kernel has to process. + * @param[in] lws Local workgroup size requested, by default (1, 1, 1) + * + * @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed. + */ +void enqueue(IGCKernel &kernel, const Window &window, const gles::NDRange &lws = gles::NDRange(1U, 1U, 1U)); +} +#endif /*__ARM_COMPUTE_IGCKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h b/arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h new file mode 100644 index 0000000000..413e86a2b7 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IGCSIMPLE2DKERNEL_H__ +#define __ARM_COMPUTE_IGCSIMPLE2DKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for simple OpenGL ES kernels having 1 tensor input and 1 tensor output. This interface can be used when the work-item processes a 2D tile */ +class IGCSimple2DKernel : public IGCSimpleKernel +{ +public: + // Inherited methods overridden: + void run(const Window &window) override; +}; +} +#endif /*__ARM_COMPUTE_IGCSIMPLE2DKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h b/arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h new file mode 100644 index 0000000000..622e53c38b --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IGCSIMPLE3DKERNEL_H__ +#define __ARM_COMPUTE_IGCSIMPLE3DKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for simple GLES kernels having 1 tensor input and 1 tensor output. + * Both input tensor and output tensor must have at least 3 dimensions. + */ +class IGCSimple3DKernel : public IGCSimple2DKernel +{ +public: + // Inherited methods overridden: + void run(const Window &window) override; +}; +} +#endif /*__ARM_COMPUTE_IGCSIMPLE3DKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h b/arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h new file mode 100644 index 0000000000..a23c4e774e --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IGCSIMPLEKERNEL_H__ +#define __ARM_COMPUTE_IGCSIMPLEKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/Helpers.h" + +namespace arm_compute +{ +/** Interface for simple OpenGL ES kernels having 1 tensor input and 1 tensor output */ +class IGCSimpleKernel : public IGCKernel +{ +public: + /** Constructor. */ + IGCSimpleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + IGCSimpleKernel(const IGCSimpleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + IGCSimpleKernel &operator=(const IGCSimpleKernel &) = delete; + /** Allow instances of this class to be moved. */ + IGCSimpleKernel(IGCSimpleKernel &&) = default; + /** Allow instances of this class to be moved. */ + IGCSimpleKernel &operator=(IGCSimpleKernel &&) = default; + /** Default destructor */ + ~IGCSimpleKernel() = default; + + /** Configure the kernel + * + * @param[in] input Source tensor. + * @param[out] output Destination tensor. + * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration. + * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant. + * @param[in] border_size (Optional) Size of the border. + */ + void configure(const IGCTensor *input, IGCTensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize()); + +protected: + const IGCTensor *_input; + IGCTensor *_output; +}; +} + +#endif /*__ARM_COMPUTE_IGCSIMPLEKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/IGCTensor.h b/arm_compute/core/GLES_COMPUTE/IGCTensor.h new file mode 100644 index 0000000000..ab4e57e0ce --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/IGCTensor.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IGCTENSOR_H__ +#define __ARM_COMPUTE_IGCTENSOR_H__ + +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/ITensor.h" + +#include + +namespace arm_compute +{ +/** Interface for GLES Compute tensor */ +class IGCTensor : public ITensor +{ +public: + /** Default constructor. */ + IGCTensor(); + + /** Prevent instances of this class from being copied (As this class contains pointers). */ + IGCTensor(const IGCTensor &) = delete; + + /** Prevent instances of this class from being copy assigned (As this class contains pointers). */ + IGCTensor &operator=(const IGCTensor &) = delete; + + /** Allow instances of this class to be moved */ + IGCTensor(IGCTensor &&) = default; + + /** Allow instances of this class to be moved */ + IGCTensor &operator=(IGCTensor &&) = default; + + /** Virtual destructor */ + virtual ~IGCTensor() = default; + + /** Map on an allocated buffer. + * + * @param[in] blocking (Optional) If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + void map(bool blocking = true); + /** Unmap an allocated and mapped buffer. + */ + void unmap(); + /** Clear the contents of the tensor synchronously. + */ + void clear(); + + // Inherited methods overridden: + uint8_t *buffer() const override; + /** Interface to be implemented by the child class to return the tensor's gles compute buffer id. + * + * @return A SSBO buffer id. + */ + virtual GLuint gc_buffer() const = 0; + +protected: + /** Method to be implemented by the child class to map the SSBO. + * + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + virtual uint8_t *do_map(bool blocking) = 0; + /** Method to be implemented by the child class to unmap the SSBO. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + */ + virtual void do_unmap() = 0; + +private: + uint8_t *_mapping; +}; + +using IGCImage = IGCTensor; +} +#endif /*__ARM_COMPUTE_IGCTENSOR_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/OpenGLES.h b/arm_compute/core/GLES_COMPUTE/OpenGLES.h new file mode 100644 index 0000000000..e123982945 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/OpenGLES.h @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_OPENGLES_H__ +#define __ARM_COMPUTE_OPENGLES_H__ + +#include "arm_compute/core/Log.h" + +#include +#include +#include +#include +#include +#include + +#ifdef ARM_COMPUTE_DEBUG_ENABLED +#define ARM_COMPUTE_GL_CHECK(x) \ + x; \ + { \ + GLenum error = glGetError(); \ + if(error != GL_NO_ERROR) \ + ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("glGetError() = %i (0x%.8x)\n", error, error); \ + } +#else /* ARM_COMPUTE_DEBUG_ENABLED */ +#define ARM_COMPUTE_GL_CHECK(x) x +#endif /* ARM_COMPUTE_DEBUG_ENABLED */ + +namespace arm_compute +{ +namespace gles +{ +/** Class interface for specifying NDRange values. */ +class NDRange +{ +private: + size_t _sizes[3]; + size_t _dimensions; + +public: + /** Default constructor - resulting range has zero dimensions. */ + NDRange() + : _dimensions(0) + { + _sizes[0] = 0; + _sizes[1] = 0; + _sizes[2] = 0; + } + + /** Constructs one-dimensional range. + * + * @param[in] size0 Size of the first dimension. + */ + NDRange(size_t size0) + : _dimensions(1) + { + _sizes[0] = size0; + _sizes[1] = 1; + _sizes[2] = 1; + } + + /** Constructs two-dimensional range. + * + * @param[in] size0 Size of the first dimension. + * @param[in] size1 Size of the second dimension. + */ + NDRange(size_t size0, size_t size1) + : _dimensions(2) + { + _sizes[0] = size0; + _sizes[1] = size1; + _sizes[2] = 1; + } + + /** Constructs three-dimensional range. + * + * @param[in] size0 Size of the first dimension. + * @param[in] size1 Size of the second dimension. + * @param[in] size2 Size of the third dimension. + */ + NDRange(size_t size0, size_t size1, size_t size2) + : _dimensions(3) + { + _sizes[0] = size0; + _sizes[1] = size1; + _sizes[2] = size2; + } + + /** Conversion operator to const size_t *. + * + * @returns A pointer to the size of the first dimension. + */ + operator const size_t *() const + { + return _sizes; + } + + /** Queries the number of dimensions in the range. + * + * @returns The number of dimensions. + */ + size_t dimensions() const + { + return _dimensions; + } + + /** Returns the size of the object in bytes based on the runtime number of dimensions + * + * @returns The size of the object in bytes. + */ + size_t size() const + { + return _dimensions * sizeof(size_t); + } + + /** Returns the sizes array for each dimensions. + * + * @returns The sizes array + */ + size_t *get() + { + return _sizes; + } + + /** Returns the sizes array for each dimensions. + * + * @returns The sizes array + */ + const size_t *get() const + { + return _sizes; + } +}; + +static const NDRange NullRange; +static const NDRange Range_128_1 = NDRange(128, 1); +} // namespace gles + +/** Check if the OpenGL ES 3.1 API is available at runtime. + * + * @returns true if the OpenGL ES 3.1 API is available. + */ +bool opengles31_is_available(); +} // namespace arm_compute + +#endif /* __ARM_COMPUTE_OPENGLES_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h new file mode 100644 index 0000000000..71f7b37700 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCABSOLUTEDIFFERENCEKERNEL_H__ +#define __ARM_COMPUTE_GCABSOLUTEDIFFERENCEKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the absolute difference kernel. + * + * Absolute difference is computed by: + * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f] + */ +class GCAbsoluteDifferenceKernel : public IGCKernel +{ +public: + /** Default constructor. */ + GCAbsoluteDifferenceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + GCAbsoluteDifferenceKernel(const GCAbsoluteDifferenceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + GCAbsoluteDifferenceKernel &operator=(const GCAbsoluteDifferenceKernel &) = delete; + /** Allow instances of this class to be moved. */ + GCAbsoluteDifferenceKernel(GCAbsoluteDifferenceKernel &&) = default; + /** Allow instances of this class to be moved. */ + GCAbsoluteDifferenceKernel &operator=(GCAbsoluteDifferenceKernel &&) = default; + /** Default destructor */ + ~GCAbsoluteDifferenceKernel() = default; + + /** Set the inputs and output images. + * + * @param[in] input1 Source tensor. Data types supported: U8 + * @param[in] input2 Source tensor. Data types supported: U8 + * @param[out] output Destination tensor. Data types supported: U8 + */ + void configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IGCTensor *_input1; /**< Source tensor 1. */ + const IGCTensor *_input2; /**< Source tensor 2. */ + IGCTensor *_output; /**< Destination tensor. */ +}; +} +#endif /* __ARM_COMPUTE_GCABSOLUTEDIFFERENCEKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h new file mode 100644 index 0000000000..fc1d52f455 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCACTIVATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_GCACTIVATIONLAYERKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the activation layer kernel. */ +class GCActivationLayerKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCActivationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCActivationLayerKernel(const GCActivationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCActivationLayerKernel &operator=(const GCActivationLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + GCActivationLayerKernel(GCActivationLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + GCActivationLayerKernel &operator=(GCActivationLayerKernel &&) = default; + /** Default destructor */ + ~GCActivationLayerKernel() = default; + /** Set the input and output tensor. + * + * @note If the output tensor is a nullptr, the activation function will be performed in-place + * + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result + * of the activation function. Data types supported: F16/F32. + * @param[out] output Destination tensor. Data type should match the input data type. + * @param[in] act_info Activation layer information. + */ + void configure(IGCTensor *input, IGCTensor *output, ActivationLayerInfo act_info); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + IGCTensor *_input; + IGCTensor *_output; +}; +} +#endif /*__ARM_COMPUTE_GCACTIVATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h new file mode 100644 index 0000000000..2bbd6a83fe --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCBATCHNORMALIZATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_GCBATCHNORMALIZATIONLAYERKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the BatchNormalization layer kernel. + */ +class GCBatchNormalizationLayerKernel : public IGCKernel +{ +public: + /** Constructor */ + GCBatchNormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCBatchNormalizationLayerKernel(const GCBatchNormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCBatchNormalizationLayerKernel &operator=(const GCBatchNormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + GCBatchNormalizationLayerKernel(GCBatchNormalizationLayerKernel &&) = default; + /** Default move assignment operator. */ + GCBatchNormalizationLayerKernel &operator=(GCBatchNormalizationLayerKernel &&) = default; + /** Default destructor */ + ~GCBatchNormalizationLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: F16/F32. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] epsilon Small value to avoid division with zero. + */ + void configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta, const IGCTensor *gamma, float epsilon); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IGCTensor *_input; + IGCTensor *_output; + const IGCTensor *_mean; + const IGCTensor *_var; + const IGCTensor *_beta; + const IGCTensor *_gamma; + float _epsilon; +}; +} +#endif /*__ARM_COMPUTE_GCBATCHNORMALIZATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h new file mode 100644 index 0000000000..257ab0eca0 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_GCCOL2IMKERNEL_H__ +#define __ARM_COMPUTE_GCCOL2IMKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the col2im reshaping kernel. + * + * Rearranges each matrix column into image blocks. It's the inverse operation of @ref GCIm2ColKernel. + * + * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3: + * + * @f[ + * \left( \begin{array}{ccccccccc} + * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccc} + * a0 & a1 & a2 \\ + * a3 & a4 & a5 \\ + * a6 & a7 & a8 \\ + * \end{array} \right) + * @f] + */ +class GCCol2ImKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCCol2ImKernel(); + + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCCol2ImKernel(const GCCol2ImKernel &) = delete; + + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCCol2ImKernel &operator=(const GCCol2ImKernel &) = delete; + + /** Allow instances of this class to be moved */ + GCCol2ImKernel(GCCol2ImKernel &&) = default; + + /** Allow instances of this class to be moved */ + GCCol2ImKernel &operator=(GCCol2ImKernel &&) = default; + + /** Default destructor */ + ~GCCol2ImKernel() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. Data types supported: F32 + * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], + * while the rest represent batch of outputs. Data types supported: Same as @p input + * @param[in] convolved_dims Output convolved dimensions. + */ + void configure(const IGCTensor *input, IGCTensor *output, std::pair convolved_dims); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IGCTensor *_input; + IGCTensor *_output; + std::pair _convolved_dims; +}; +} + +#endif /*__ARM_COMPUTE_GCCOL2IMKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h new file mode 100644 index 0000000000..9a34a9a9c5 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__ +#define __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the depth concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class GCDepthConcatenateKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCDepthConcatenateKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCDepthConcatenateKernel(const GCDepthConcatenateKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCDepthConcatenateKernel &operator=(const GCDepthConcatenateKernel &) = delete; + /** Allow instances of this class to be moved */ + GCDepthConcatenateKernel(GCDepthConcatenateKernel &&) = default; + /** Allow instances of this class to be moved */ + GCDepthConcatenateKernel &operator=(GCDepthConcatenateKernel &&) = default; + /** Default destructor */ + ~GCDepthConcatenateKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data types supported: F16/F32. + * @param[in] depth_offset The offset on the Z axis. + * @param[in,out] output Output tensor. Data types supported: Same as @p input. + * + * @note: The output tensor's low two dimensions can't be smaller than the input one's. + * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. + * + */ + void configure(const IGCTensor *input, unsigned int depth_offset, IGCTensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const IGCTensor *_input; + IGCTensor *_output; + int _top_bottom; + int _left_right; +}; +} +#endif /* __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h new file mode 100644 index 0000000000..415b781bc6 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYERKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the direct convolution kernel. + */ +template +class GCDirectConvolutionLayerKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCDirectConvolutionLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCDirectConvolutionLayerKernel(const GCDirectConvolutionLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCDirectConvolutionLayerKernel &operator=(const GCDirectConvolutionLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + GCDirectConvolutionLayerKernel(GCDirectConvolutionLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + GCDirectConvolutionLayerKernel &operator=(GCDirectConvolutionLayerKernel &&) = default; + /** Default destructor */ + ~GCDirectConvolutionLayerKernel() = default; + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32 + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. + * @param[in] bias Biases tensor. Shared bias supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input. + * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, + * while every dimension above represents a batch. Data types supported: Same as @p input + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *bias, IGCTensor *output, const PadStrideInfo &conv_info); + + // Inherited methods overridden: + BorderSize border_size() const override; + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IGCTensor *_input; + const IGCTensor *_bias; + const IGCTensor *_weights; + IGCTensor *_output; + BorderSize _border_size; + int _conv_stride_x; + int _conv_stride_y; + int _conv_pad_x; + int _conv_pad_y; + gles::NDRange _lws; +}; + +using GCDirectConvolutionLayer1x1Kernel = GCDirectConvolutionLayerKernel<1>; +using GCDirectConvolutionLayer3x3Kernel = GCDirectConvolutionLayerKernel<3>; +using GCDirectConvolutionLayer5x5Kernel = GCDirectConvolutionLayerKernel<5>; +} +#endif /*__ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h new file mode 100644 index 0000000000..6159a7af26 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_GCDROPOUTKERNEL_H__ +#define __ARM_COMPUTE_GCDROPOUTKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the dropout kernel. + * + * Dropout is used to improve over-fit on neural networks. + * + */ +class GCDropoutKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCDropoutKernel(); + + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCDropoutKernel(const GCDropoutKernel &) = delete; + + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCDropoutKernel &operator=(const GCDropoutKernel &) = delete; + + /** Allow instances of this class to be moved */ + GCDropoutKernel(GCDropoutKernel &&) = default; + + /** Allow instances of this class to be moved */ + GCDropoutKernel &operator=(GCDropoutKernel &&) = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor for this op. Data types supported: F16/F32 + * @param[out] mask The mask tensor. Data types supported: Same as @p input + * @param[out] output The output tensor. Data types supported: Same as @p input + * @param[in] ratio Dropout ratio + * @param[in] forward Forward or backward propagation + * + */ + void configure(const IGCTensor *input, IGCTensor *mask, IGCTensor *output, float ratio, bool forward); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IGCTensor *_input; + IGCTensor *_mask; + IGCTensor *_output; + unsigned int _num_elems_processed_per_iteration; +}; +} + +#endif /*__ARM_COMPUTE_GCDROPOUTKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h new file mode 100644 index 0000000000..acb8aa67d3 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCFILLBORDERKERNEL_H__ +#define __ARM_COMPUTE_GCFILLBORDERKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for filling the border of a kernel */ +class GCFillBorderKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCFillBorderKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCFillBorderKernel(const GCFillBorderKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCFillBorderKernel &operator=(const GCFillBorderKernel &) = delete; + /** Allow instances of this class to be moved */ + GCFillBorderKernel(GCFillBorderKernel &&) = default; + /** Allow instances of this class to be moved */ + GCFillBorderKernel &operator=(GCFillBorderKernel &&) = default; + /** Default destructor */ + ~GCFillBorderKernel() = default; + + /** Initialise the kernel's input, output and border mode. + * + * @param[in,out] tensor Tensor to process Data types supported: F16/F32. + * @param[in] border_size Size of the border to fill in elements. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const IGCTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + + /** Function to set the constant value on fill border kernel depending on type. + * + * @param[in] idx Index of the kernel argument to set. + * @param[in] constant_border_value Constant value to use for borders if border_mode is set to CONSTANT. + */ + template + void set_constant_border(unsigned int idx, const PixelValue &constant_border_value); + + // Inherited methods overridden: + void run(const Window &window) override; + bool is_parallelisable() const override; + +private: + const IGCTensor *_tensor; +}; +} +#endif /*__ARM_COMPUTE_GCFILLBORDERKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h new file mode 100644 index 0000000000..b2369a6ad1 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCGEMMINTERLEAVE4X4KERNEL_H__ +#define __ARM_COMPUTE_GCGEMMINTERLEAVE4X4KERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** OpenGL ES kernel which interleaves the elements of a matrix A in chunk of 4x4 + * + * This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values) + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccccccccccc} + * a00 & a10 & a20 & a30 & a01 & a11 & a21 & a31 & a02 & a12 & a22 & a32 & a03 & a13 & a23 & a33 \\ + * \end{array} \right) + * @f] + * + * After this operation, the output matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ] + */ +class GCGEMMInterleave4x4Kernel : public IGCKernel +{ +public: + /** Default constructor */ + GCGEMMInterleave4x4Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCGEMMInterleave4x4Kernel(const GCGEMMInterleave4x4Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCGEMMInterleave4x4Kernel &operator=(const GCGEMMInterleave4x4Kernel &) = delete; + /** Allow instances of this class to be moved */ + GCGEMMInterleave4x4Kernel(GCGEMMInterleave4x4Kernel &&) = default; + /** Allow instances of this class to be moved */ + GCGEMMInterleave4x4Kernel &operator=(GCGEMMInterleave4x4Kernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: F32 + * @param[out] output Output tensor. Data type supported: same as @p input + */ + void configure(const IGCTensor *input, IGCTensor *output); + + // Inherited methods overridden + void run(const Window &window) override; + +private: + const IGCTensor *_input; + IGCTensor *_output; +}; +} +#endif /* __ARM_COMPUTE_GCGEMMINTERLEAVE4X4KERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h new file mode 100644 index 0000000000..20f28cbb65 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCGEMMMATRIXACCUMULATEBIASESKERNEL_H__ +#define __ARM_COMPUTE_GCGEMMMATRIXACCUMULATEBIASESKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +/** Interface to add a bias to each row of the input tensor + * + */ +class GCGEMMMatrixAccumulateBiasesKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCGEMMMatrixAccumulateBiasesKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCGEMMMatrixAccumulateBiasesKernel(const GCGEMMMatrixAccumulateBiasesKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCGEMMMatrixAccumulateBiasesKernel &operator=(const GCGEMMMatrixAccumulateBiasesKernel &) = delete; + /** Allow instances of this class to be moved */ + GCGEMMMatrixAccumulateBiasesKernel(GCGEMMMatrixAccumulateBiasesKernel &&) = default; + /** Allow instances of this class to be moved */ + GCGEMMMatrixAccumulateBiasesKernel &operator=(GCGEMMMatrixAccumulateBiasesKernel &&) = default; + /** Set the accumulate buffer and the biases of the kernel. + * + * @param[in, out] accum The accumulate tensor to convert. Data types supported: F16/F32 + * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input + */ + void configure(IGCTensor *accum, const IGCTensor *biases); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + IGCTensor *_accum; + const IGCTensor *_biases; +}; +} + +#endif /*__ARM_COMPUTE_GCGEMMMATRIXACCUMULATEBIASESKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h new file mode 100644 index 0000000000..02abb8da76 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCGEMMMATRIXADDITIONKERNEL_H__ +#define __ARM_COMPUTE_GCGEMMMATRIXADDITIONKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** OpenGL ES kernel to perform the in-place matrix addition between 2 matrices, taking into account that the second matrix might be weighted by a scalar value beta. + * The matrices must have the same dimensions + * + * @note This kernel is computed if and only if beta != 0.0. + */ +class GCGEMMMatrixAdditionKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCGEMMMatrixAdditionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCGEMMMatrixAdditionKernel(const GCGEMMMatrixAdditionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCGEMMMatrixAdditionKernel &operator=(const GCGEMMMatrixAdditionKernel &) = delete; + /** Allow instances of this class to be moved */ + GCGEMMMatrixAdditionKernel(GCGEMMMatrixAdditionKernel &&) = default; + /** Allow instances of this class to be moved */ + GCGEMMMatrixAdditionKernel &operator=(GCGEMMMatrixAdditionKernel &&) = default; + /** Initialise the kernel's input, output and beta value + * + * @note The input and output tensors must have the same dimensions + * + * @param[in] input Input tensor (Matrix C). Data types supported: F32 + * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result (alpha * AB + beta * C), output must contain the result obtained by @ref GCGEMMMatrixMultiplyKernel. Data type supported: same as @p input + * @param[in] beta Weight of matrix C + */ + void configure(const IGCTensor *input, IGCTensor *output, float beta); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IGCTensor *_input; + IGCTensor *_output; +}; +} + +#endif /* __ARM_COMPUTE_GCGEMMMATRIXADDITIONKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h new file mode 100644 index 0000000000..3a0b22f148 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCGEMMMATRIXMULTIPLYKERNEL_H__ +#define __ARM_COMPUTE_GCGEMMMATRIXMULTIPLYKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** GLES Compute kernel to multiply two input matrices "A" and "B" or to multiply a vector "A" by a matrix "B". All elements of the output matrix/vector will be multiplied by alpha + * + * @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref GCGEMMInterleave4x4Kernel" and @ref GCGEMMTranspose1xWKernel + * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped + * + * @attention The second input tensor must have at least 2 dimensions (matrix) + * + */ +class GCGEMMMatrixMultiplyKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCGEMMMatrixMultiplyKernel(); + + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCGEMMMatrixMultiplyKernel(const GCGEMMMatrixMultiplyKernel &) = delete; + + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCGEMMMatrixMultiplyKernel &operator=(const GCGEMMMatrixMultiplyKernel &) = delete; + + /** Allow instances of this class to be moved */ + GCGEMMMatrixMultiplyKernel(GCGEMMMatrixMultiplyKernel &&) = default; + + /** Allow instances of this class to be moved */ + GCGEMMMatrixMultiplyKernel &operator=(GCGEMMMatrixMultiplyKernel &&) = default; + + /** Initialise the kernel's input, output and alpha + * + * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32 + * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector. + * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref GCGEMMInterleave4x4Kernel and @ref GCGEMMTranspose1xWKernel + */ + void configure(const IGCTensor *input0, const IGCTensor *input1, IGCTensor *output, float alpha, bool is_interleaved_transposed = true); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IGCTensor *_input0; + const IGCTensor *_input1; + IGCTensor *_output; +}; +} +#endif /* __ARM_COMPUTE_GCGEMMMATRIXMULTIPLYKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h new file mode 100644 index 0000000000..4223556ac4 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCGEMMTRANSPOSE1XWKERNEL_H__ +#define __ARM_COMPUTE_GCGEMMTRANSPOSE1XWKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** OpenGLES kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor) + * + * Following an example of how the transposition1xW works when the input data type is F32 + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccccccccccc} + * a00 & a01 & a02 & a03 & a10 & a11 & a12 & a13 & a20 & a21 & a22 & a23 & a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * @f] + * + * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor) + * + */ +class GCGEMMTranspose1xWKernel : public IGCSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: F32 + * @param[out] output Output tensor. Data type supported: same as @p input + */ + void configure(const IGCTensor *input, IGCTensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; +}; +} +#endif /* __ARM_COMPUTE_GCGEMMTRANSPOSE1XWKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h new file mode 100644 index 0000000000..e1b35607ff --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCIM2COLKERNEL_H__ +#define __ARM_COMPUTE_GCIM2COLKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the im2col reshape kernel. + * + * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column. + * It is used to transform a convolution to a plain matrix multiplication. + * + * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have: + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * = + * \left( \begin{array}{ccccccccc} + * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\ + * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\ + * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\ + * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\ + * \end{array} \right) + * @f] + */ +class GCIm2ColKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCIm2ColKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCIm2ColKernel(const GCIm2ColKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCIm2ColKernel &operator=(const GCIm2ColKernel &) = delete; + /** Allow instances of this class to be moved */ + GCIm2ColKernel(GCIm2ColKernel &&) = default; + /** Allow instances of this class to be moved */ + GCIm2ColKernel &operator=(GCIm2ColKernel &&) = default; + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F32 + * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, + * while every dimension above represents a batch. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + */ + void configure(const IGCTensor *input, IGCTensor *output, std::pair kernel_dims, const PadStrideInfo &conv_info, bool has_bias); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Run the reshape kernel optimised for the special case (stride is 1, padding is 0 and kernel's low 3 dimensions are same as input) + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel. + */ + void run_reduced(const Window &window); + /** run the generic convolution layer input reshape kernel + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel. + */ + void run_generic(const Window &window); + + /** Common signature for the kernel to run */ + using Im2ColFunction = void (GCIm2ColKernel::*)(const Window &); + +private: + const IGCTensor *_input; + IGCTensor *_output; + std::pair _convolved_dims; + unsigned int _num_elems_processed_per_iteration; + Im2ColFunction _run_func; +}; +} + +#endif /*__ARM_COMPUTE_GCIM2COLKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h new file mode 100644 index 0000000000..e8bc7ad2b2 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCNORMALIZATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_GCNORMALIZATIONLAYERKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the normalization layer kernel. + */ +class GCNormalizationLayerKernel : public IGCKernel +{ +public: + /** Constructor */ + GCNormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCNormalizationLayerKernel(const GCNormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCNormalizationLayerKernel &operator=(const GCNormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + GCNormalizationLayerKernel(GCNormalizationLayerKernel &&) = default; + /** Default move assignment operator. */ + GCNormalizationLayerKernel &operator=(GCNormalizationLayerKernel &&) = default; + /** Default destrutor */ + ~GCNormalizationLayerKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: F32. + * @param[in] squared_input Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM], + * Data types should match the input type. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types should match the input type. + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + */ + void configure(const IGCTensor *input, const IGCTensor *squared_input, IGCTensor *output, NormalizationLayerInfo norm_info); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const IGCTensor *_input; + const IGCTensor *_squared_input; + IGCTensor *_output; + BorderSize _border_size; +}; +} +#endif /*__ARM_COMPUTE_GCNORMALIZATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h new file mode 100644 index 0000000000..3b01b4ad4d --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCPIXELWISEMULTIPLICATIONKERNEL_H__ +#define __ARM_COMPUTE_GCPIXELWISEMULTIPLICATIONKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the pixelwise multiplication kernel. + * + */ +class GCPixelWiseMultiplicationKernel : public IGCKernel +{ +public: + /** Default constructor.*/ + GCPixelWiseMultiplicationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + GCPixelWiseMultiplicationKernel(const GCPixelWiseMultiplicationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + GCPixelWiseMultiplicationKernel &operator=(const GCPixelWiseMultiplicationKernel &) = delete; + /** Allow instances of this class to be moved */ + GCPixelWiseMultiplicationKernel(GCPixelWiseMultiplicationKernel &&) = default; + /** Allow instances of this class to be moved */ + GCPixelWiseMultiplicationKernel &operator=(GCPixelWiseMultiplicationKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input1 An input tensor. Data types supported: F32. + * @param[in] input2 An input tensor. Data types supported: same as @p input1. + * @param[out] output The output tensor, Data types supported: same as @p input1. + * @param[in] scale Scale to apply after multiplication. + * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. + */ + void configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output, float scale); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IGCTensor *_input1; + const IGCTensor *_input2; + IGCTensor *_output; +}; +} + +#endif /*__ARM_COMPUTE_GCPIXELWISEMULTIPLICATIONKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h new file mode 100644 index 0000000000..d4921c2092 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCPOOLINGLAYERKERNEL_H__ +#define __ARM_COMPUTE_GCPOOLINGLAYERKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the pooling layer kernel */ +class GCPoolingLayerKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCPoolingLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCPoolingLayerKernel(const GCPoolingLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCPoolingLayerKernel &operator=(const GCPoolingLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + GCPoolingLayerKernel(GCPoolingLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + GCPoolingLayerKernel &operator=(GCPoolingLayerKernel &&) = default; + /** Default destructor */ + ~GCPoolingLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16/F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + */ + void configure(const IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const IGCTensor *_input; + IGCTensor *_output; + PoolingLayerInfo _pool_info; + BorderSize _border_size; + unsigned int _num_elems_processed_per_iteration; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_GCPOOLINGLAYERKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h new file mode 100644 index 0000000000..b9eb305bab --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCSOFTMAXLAYERKERNEL_H__ +#define __ARM_COMPUTE_GCSOFTMAXLAYERKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the identifying the max value of 1D Logits */ +class GCLogits1DMaxKernel : public IGCSimple3DKernel +{ +public: + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16/F32 + * @param[out] output Destination tensor. Data types supported: same as @p input + */ + void configure(const IGCTensor *input, IGCTensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; +}; + +/** Interface for shifting the logits values around the max value and exponentiating the result */ +class GCLogits1DShiftExpSumKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCLogits1DShiftExpSumKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCLogits1DShiftExpSumKernel(const GCLogits1DShiftExpSumKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCLogits1DShiftExpSumKernel &operator=(const GCLogits1DShiftExpSumKernel &) = delete; + /** Allow instances of this class to be moved */ + GCLogits1DShiftExpSumKernel(GCLogits1DShiftExpSumKernel &&) = default; + /** Allow instances of this class to be moved */ + GCLogits1DShiftExpSumKernel &operator=(GCLogits1DShiftExpSumKernel &&) = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16/F32 + * @param[in] max Max values tensor. Data types supported: same as @p input + * @param[out] output Destination tensor. Data types supported: same as @p input + * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input + */ + void configure(const IGCTensor *input, const IGCTensor *max, IGCTensor *output, IGCTensor *sum); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IGCTensor *_input; + const IGCTensor *_max; + IGCTensor *_output; + IGCTensor *_sum; +}; + +/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */ +class GCLogits1DNormKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCLogits1DNormKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCLogits1DNormKernel(const GCLogits1DNormKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCLogits1DNormKernel &operator=(const GCLogits1DNormKernel &) = delete; + /** Allow instances of this class to be moved */ + GCLogits1DNormKernel(GCLogits1DNormKernel &&) = default; + /** Allow instances of this class to be moved */ + GCLogits1DNormKernel &operator=(GCLogits1DNormKernel &&) = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16/F32 + * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input + * @param[out] output Destination tensor. Data types supported: same as @p input + */ + void configure(const IGCTensor *input, const IGCTensor *sum, IGCTensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IGCTensor *_input; + const IGCTensor *_sum; + IGCTensor *_output; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_GCSOFTMAXLAYERKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h new file mode 100644 index 0000000000..c628a00585 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCTRANSPOSEKERNEL_H__ +#define __ARM_COMPUTE_GCTRANSPOSEKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** OpenGL ES kernel which transposes the elements of a matrix. + * + * [width, height, batch] -> [height, width, batch] + * + */ +class GCTransposeKernel : public IGCSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: F16/F32 + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(const IGCTensor *input, IGCTensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; +}; +} +#endif /* __ARM_COMPUTE_GCTRANSPOSEKERNEL_H__ */ diff --git a/arm_compute/core/Log.h b/arm_compute/core/Log.h index 488c8e6190..70e7c51110 100644 --- a/arm_compute/core/Log.h +++ b/arm_compute/core/Log.h @@ -44,9 +44,12 @@ * @param[in] log_level Logging level * @param[in] msg Message to log */ -#define ARM_COMPUTE_LOG_MSG_CORE(log_level, msg) \ - ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ - ARM_COMPUTE_LOG_MSG("CORE", log_level, msg) +#define ARM_COMPUTE_LOG_MSG_CORE(log_level, msg) \ + do \ + { \ + ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ + ARM_COMPUTE_LOG_MSG("CORE", log_level, msg); \ + } while(false) /** Log a message with format to the core system logger * @@ -54,42 +57,57 @@ * @param[in] fmt String format (printf style) * @param[in] ... Message arguments */ -#define ARM_COMPUTE_LOG_MSG_WITH_FORMAT_CORE(log_level, fmt, ...) \ - ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ - ARM_COMPUTE_LOG_MSG_WITH_FORMAT("CORE", log_level, fmt, __VA_ARGS__) +#define ARM_COMPUTE_LOG_MSG_WITH_FORMAT_CORE(log_level, fmt, ...) \ + do \ + { \ + ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ + ARM_COMPUTE_LOG_MSG_WITH_FORMAT("CORE", log_level, fmt, __VA_ARGS__); \ + } while(false) /** Log a stream to the core system logger * * @param[in] log_level Logging level * @param[in] ss Stream to log */ -#define ARM_COMPUTE_LOG_STREAM_CORE(log_level, ss) \ - ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ - ARM_COMPUTE_LOG_STREAM("CORE", log_level, ss) +#define ARM_COMPUTE_LOG_STREAM_CORE(log_level, ss) \ + do \ + { \ + ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ + ARM_COMPUTE_LOG_STREAM("CORE", log_level, ss); \ + } while(false) /** Log information level message to the core system logger * * @param[in] msg Stream to log */ -#define ARM_COMPUTE_LOG_INFO_MSG_CORE(msg) \ - ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ - ARM_COMPUTE_LOG_MSG_CORE(arm_compute::logging::LogLevel::INFO, msg) +#define ARM_COMPUTE_LOG_INFO_MSG_CORE(msg) \ + do \ + { \ + ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ + ARM_COMPUTE_LOG_MSG_CORE(arm_compute::logging::LogLevel::INFO, msg); \ + } while(false) /** Log information level formatted message to the core system logger * * @param[in] fmt String format (printf style) * @param[in] ... Message arguments */ -#define ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE(fmt, ...) \ - ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ - ARM_COMPUTE_LOG_MSG_WITH_FORMAT_CORE(arm_compute::logging::LogLevel::INFO, msg) +#define ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE(fmt, ...) \ + do \ + { \ + ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ + ARM_COMPUTE_LOG_MSG_WITH_FORMAT_CORE(arm_compute::logging::LogLevel::INFO, fmt, __VA_ARGS__); \ + } while(false) /** Log information level stream to the core system logger * * @param[in] ss Message to log */ -#define ARM_COMPUTE_LOG_INFO_STREAM_CORE(ss) \ - ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ - ARM_COMPUTE_LOG_STREAM_CORE(arm_compute::logging::LogLevel::INFO, ss) +#define ARM_COMPUTE_LOG_INFO_STREAM_CORE(ss) \ + do \ + { \ + ARM_COMPUTE_CREATE_DEFAULT_CORE_LOGGER(); \ + ARM_COMPUTE_LOG_STREAM_CORE(arm_compute::logging::LogLevel::INFO, ss); \ + } while(false) #endif /* __ARM_COMPUTE_LOGGING_MACROS_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h index 04c4c9ebba..1dfe075310 100644 --- a/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEBatchNormalizationLayerKernel.h @@ -54,12 +54,12 @@ public: * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. * 3 lower dimensions represent a single input with dimensions [width, height, FM]. * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input * @param[in] epsilon Small value to avoid division with zero. - * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input */ void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon); diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h index 96e99e6874..b8c5b34e5a 100644 --- a/arm_compute/core/Utils.h +++ b/arm_compute/core/Utils.h @@ -837,7 +837,16 @@ void print_consecutive_elements_impl(std::ostream &s, const T *ptr, unsigned int { s.width(stream_width); } - s << std::right << static_cast(ptr[i]) << element_delim; + + if(std::is_same::type, half>::value) + { + // We use T instead of print_type here is because the std::is_floating_point returns false and then the print_type becomes int. + s << std::right << static_cast(ptr[i]) << element_delim; + } + else + { + s << std::right << static_cast(ptr[i]) << element_delim; + } } } @@ -859,7 +868,17 @@ int max_consecutive_elements_display_width_impl(std::ostream &s, const T *ptr, u { std::stringstream ss; ss.copyfmt(s); - ss << static_cast(ptr[i]); + + if(std::is_same::type, half>::value) + { + // We use T instead of print_type here is because the std::is_floating_point returns false and then the print_type becomes int. + ss << static_cast(ptr[i]); + } + else + { + ss << static_cast(ptr[i]); + } + max_width = std::max(max_width, ss.str().size()); } return max_width; diff --git a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h index 70a201a1f8..d84ba69da2 100644 --- a/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLBatchNormalizationLayer.h @@ -54,8 +54,8 @@ public: * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input * @param[in] epsilon Small value to avoid division with zero. */ void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *mean, const ICLTensor *var, const ICLTensor *beta, const ICLTensor *gamma, float epsilon); @@ -63,12 +63,12 @@ public: * * @param[in] input Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result. * 3 lower dimensions represent a single input with dimensions [width, height, FM]. - * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32. + * @param[in] output Destination tensor info. Output will have the same number of dimensions as input. Data type supported: same as @p input * @param[in] mean Mean values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input * @param[in] var Variance values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] gamma Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input * @param[in] beta Beta values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor info. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input * @param[in] epsilon Small value to avoid division with zero. * * @return an error status diff --git a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h index 0818cec2e5..1e0b27ae43 100644 --- a/arm_compute/runtime/CL/functions/CLNormalizationLayer.h +++ b/arm_compute/runtime/CL/functions/CLNormalizationLayer.h @@ -37,7 +37,7 @@ namespace arm_compute { class ICLTensor; -/** Basic function to simulate a normalization layer. This function calls the following CL kernels: +/** Basic function to compute a normalization layer. This function calls the following CL kernels: * * -# @ref CLFillBorderKernel * -# @ref CLNormalizationLayerKernel @@ -55,7 +55,7 @@ public: * @param[out] output Destination tensor. Dimensions, data type and number of channels must match the input ones. * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. */ - void configure(ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info); + void configure(ICLTensor *input, ICLTensor *output, const NormalizationLayerInfo &norm_info); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h b/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h new file mode 100644 index 0000000000..8a345c5fab --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/GCFunctions.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCFUNCTIONS_H__ +#define __ARM_COMPUTE_GCFUNCTIONS_H__ + +/* Header regrouping all the GLES compute functions */ +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCFillBorder.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCTranspose.h" + +#endif /* __ARM_COMPUTE_GCFUNCTIONS_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/GCScheduler.h b/arm_compute/runtime/GLES_COMPUTE/GCScheduler.h new file mode 100644 index 0000000000..817f8b54b1 --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/GCScheduler.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_GCSCHEDULER_H__ +#define __ARM_COMPUTE_GCSCHEDULER_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class IGCKernel; + +/** Provides global access to a OpenGL ES context and command queue. */ +class GCScheduler +{ +private: + /** Constructor */ + GCScheduler(); + +public: + /** Access the scheduler singleton. + * + * @return The scheduler + */ + static GCScheduler &get(); + + /** Initialises the context and command queue used by the scheduler to default values + * and sets a default device and kernel path for the @ref GCKernelLibrary. + */ + void default_init(); + + /** Schedule the execution of the passed kernel if possible. + * + * @param[in] kernel Kernel to execute. + * @param[in] flush (Optional) Specifies if the command queue will be flushed after running the kernel. + */ + void enqueue(IGCKernel &kernel, bool flush = true); + + /** Initialises the display and context to be used by the scheduler. + * + * @param[in] dpy The EGL display connection + * @param[in] ctx The EGL rendering context + */ + void init(EGLDisplay dpy, EGLContext ctx); + + /** Blocks until all commands in the associated command queue have finished. */ + void sync(); +}; +} + +#endif /* __ARM_COMPUTE_GCSCHEDULER_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/GCTensor.h b/arm_compute/runtime/GLES_COMPUTE/GCTensor.h new file mode 100644 index 0000000000..3e51f9908f --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/GCTensor.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_GCTENSOR_H__ +#define __ARM_COMPUTE_GCTENSOR_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" + +namespace arm_compute +{ +class ITensorAllocator; +class ITensorInfo; + +/** Interface for OpenGL ES tensor */ +class GCTensor : public IGCTensor +{ +public: + /** Default constructor */ + GCTensor(); + + /** Prevent instances of this class from being copied (As this class contains pointers). */ + GCTensor(const GCTensor &) = delete; + + /** Prevent instances of this class from being copy assigned (As this class contains pointers). */ + GCTensor &operator=(const GCTensor &) = delete; + + /** Allow instances of this class to be moved */ + GCTensor(GCTensor &&) = default; + + /** Allow instances of this class to be moved */ + GCTensor &operator=(GCTensor &&) = default; + + /** Virtual destructor */ + virtual ~GCTensor() = default; + + /** Return a pointer to the tensor's allocator + * + * @return A pointer to the tensor's allocator + */ + ITensorAllocator *allocator(); + + /** Enqueue a map operation of the allocated buffer on the given queue. + * + * @param[in] blocking (Optional) If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + * + * @return The mapping address. + */ + void map(bool blocking = true); + + /** Enqueue an unmap operation of the allocated and mapped buffer on the given queue. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + */ + void unmap(); + + // Inherited methods overridden: + TensorInfo *info() const override; + TensorInfo *info() override; + uint8_t *buffer() const override; + GLuint gc_buffer() const override; + +protected: + // Inherited methods overridden: + uint8_t *do_map(bool blocking) override; + void do_unmap() override; + +private: + mutable GCTensorAllocator _allocator; +}; + +using GCImage = GCTensor; +} + +#endif /*__ARM_COMPUTE_GCTENSOR_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h b/arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h new file mode 100644 index 0000000000..ce52cbbbdc --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_GCTENSORALLOCATOR_H__ +#define __ARM_COMPUTE_GCTENSORALLOCATOR_H__ + +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/runtime/ITensorAllocator.h" + +#include + +namespace arm_compute +{ +/** Basic implementation of a GLES memory tensor allocator. */ +class GCTensorAllocator : public ITensorAllocator +{ +public: + /** Default constructor. */ + GCTensorAllocator(); + + /** Prevent instances of this class from being copied (As this class contains pointers). */ + GCTensorAllocator(const GCTensorAllocator &) = delete; + + /** Prevent instances of this class from being copy assigned (As this class contains pointers). */ + GCTensorAllocator &operator=(const GCTensorAllocator &) = delete; + + /** Allow instances of this class to be moved */ + GCTensorAllocator(GCTensorAllocator &&) = default; + + /** Allow instances of this class to be moved */ + GCTensorAllocator &operator=(GCTensorAllocator &&) = default; + + /** Default destructor */ + ~GCTensorAllocator() = default; + + /** Interface to be implemented by the child class to return the pointer to the mapped data. */ + uint8_t *data(); + + /** Get the OpenGL ES buffer object name + * + * @return The buffer object name + */ + GLuint get_gl_ssbo_name() const; + + /** Enqueue a map operation of the allocated buffer on the given queue. + * + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + * + * @return The mapping address. + */ + uint8_t *map(bool blocking); + + /** Enqueue an unmap operation of the allocated buffer on the given queue. + * + * @note This method simply enqueue the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + * + */ + void unmap(); + + /** Allocate size specified by TensorInfo of GLES memory. + * + * @note: The tensor must not already be allocated when calling this function. + * + */ + void allocate() override; + + /** Free allocated GLES memory. + * + * @note The tensor must have been allocated when calling this function. + * + */ + void free() override; + +protected: + /** Call map() on the SSBO. + * + * @return A pointer to the beginning of the tensor's allocation. + */ + uint8_t *lock() override; + + /** Call unmap() on the SSBO. */ + void unlock() override; + +private: + class GLBufferWrapper + { + public: + GLBufferWrapper() + : _ssbo_name(0) + { + ARM_COMPUTE_GL_CHECK(glGenBuffers(1, &_ssbo_name)); + } + ~GLBufferWrapper() + { + ARM_COMPUTE_GL_CHECK(glDeleteBuffers(1, &_ssbo_name)); + } + GLuint _ssbo_name; + }; + std::unique_ptr _gl_buffer; + uint8_t *_mapping; +}; +} + +#endif /* __ARM_COMPUTE_GCTENSORALLOCATOR_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h b/arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h new file mode 100644 index 0000000000..15bbfffe95 --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IGCSIMPLEFUNCTION_H__ +#define __ARM_COMPUTE_IGCSIMPLEFUNCTION_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h" +#include "arm_compute/runtime/IFunction.h" + +#include + +namespace arm_compute +{ +/** Basic interface for functions which have a single OpenGL ES kernel */ +class IGCSimpleFunction : public IFunction +{ +public: + /** Default constructor */ + IGCSimpleFunction(); + + // Inherited methods overridden: + void run() override final; + +protected: + std::unique_ptr _kernel; /**< Kernel to run */ + GCFillBorderKernel _border_handler; /**< Kernel to handle borders */ +}; +} +#endif /*__ARM_COMPUTE_IGCSIMPLEFUNCTION_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.h new file mode 100644 index 0000000000..0d4a354e26 --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_GCABSOLUTEDIFFERENCE_H__ +#define __ARM_COMPUTE_GCABSOLUTEDIFFERENCE_H__ + +#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref GCAbsoluteDifferenceKernel + * + * @note The tensor data types for the inputs must be U8. + * @note The function calculates the absolute difference also when the 2 inputs have different tensor data types. + */ +class GCAbsoluteDifference : public IGCSimpleFunction +{ +public: + /** Initialize the function + * + * @param[in] input1 First input tensor. Data types supported: U8 + * @param[in] input2 Second input tensor. Data types supported: U8 + * @param[out] output Output tensor. Data types supported: U8 + */ + void configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output); +}; +} + +#endif /* __ARM_COMPUTE_GCABSOLUTEDIFFERENCE_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h new file mode 100644 index 0000000000..b43456b2cd --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCACTIVATIONLAYER_H__ +#define __ARM_COMPUTE_GCACTIVATIONLAYER_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Basic function to run @ref GCActivationLayerKernel + * + * @note The function simulates an activation layer with the specified activation function. + */ +class GCActivationLayer : public IGCSimpleFunction +{ +public: + /** Set the input and output tensor. + * + * @note If the output tensor is a nullptr, the activation function will be performed in-place + * + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result + * of the activation function. Data types supported: F16/F32. + * @param[out] output Destination tensor. Data type supported: same as @p input + * @param[in] act_info Activation layer parameters. + */ + void configure(IGCTensor *input, IGCTensor *output, ActivationLayerInfo act_info); +}; +} +#endif /* __ARM_COMPUTE_GCACTIVATIONLAYER_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h new file mode 100644 index 0000000000..9d81b9a7f7 --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCBATCHNORMALIZATIONLAYER_H__ +#define __ARM_COMPUTE_GCBATCHNORMALIZATIONLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Basic function to run @ref GCBatchNormalizationLayerKernel and simulate a batch normalization layer. + * + * Batch normalization is calculated by: + * @f[ out_i = \gamma * (\frac{in_i - \mu_{B}}{\sqrt{\sigma^2_{B} + \epsilon}}) + \beta \equiv BN_{\gamma,\beta}(in_i) @f] + * + */ +class GCBatchNormalizationLayer : public IFunction +{ +public: + /** Default constructor */ + GCBatchNormalizationLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: F16/F32. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] epsilon Small value to avoid division with zero. + */ + void configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta, const IGCTensor *gamma, float epsilon); + + // Inherited methods overridden: + void run() override; + +private: + GCBatchNormalizationLayerKernel _norm_kernel; /**< BatchNormalization layer kernel to run */ +}; +} +#endif /* __ARM_COMPUTE_GCBATCHNORMALIZATIONLAYER_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h new file mode 100644 index 0000000000..801dc0e111 --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCDEPTHCONCATENATE_H__ +#define __ARM_COMPUTE_GCDEPTHCONCATENATE_H__ + +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/IFunction.h" + +#include +#include + +namespace arm_compute +{ +class IGCTensor; + +/** Basic function to execute concatenate tensors along z axis. This function calls the following kernels: + * + * -# @ref GCFillBorderKernel (executed if input's lowest two dimensions are smaller than respective output's dimensions) + * -# @ref GCDepthConcatenateKernel + * + */ +class GCDepthConcatenate : public IFunction +{ +public: + /** Default constructor */ + GCDepthConcatenate(); + /** Initialise the kernel's inputs vector and output. + * + * @param[in,out] inputs_vector The vectors containing all the tensors to concatenate. Data types supported: F16/F32. + * @param[out] output Output tensor. Data types supported: Same as @p input. + */ + void configure(std::vector inputs_vector, IGCTensor *output); + + // Inherited methods overridden: + void run() override; + +private: + std::unique_ptr _concat_kernels_vector; + std::unique_ptr _border_handlers_vector; + unsigned int _num_inputs; +}; +} +#endif /* __ARM_COMPUTE_GCDEPTHCONCATENATE_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h new file mode 100644 index 0000000000..5472bdb9ea --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYER_H__ +#define __ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYER_H__ + +#include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h" + +#include + +namespace arm_compute +{ +class IGCTensor; + +/** Basic function to execute direct convolution function: + * + * @note Supported kernel size: 1x1, 3x3, and 5x5 + * @note This OpenGL ES implementation works with stride_x = 1 and 2 + */ +class GCDirectConvolutionLayer : public IGCSimpleFunction +{ +public: + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info); +}; +} +#endif /* __ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYER_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h new file mode 100644 index 0000000000..6a08d96676 --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_GCDROPOUTLAYER_H__ +#define __ARM_COMPUTE_GCDROPOUTLAYER_H__ + +#include "arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class IGCTensor; +/** Basic function to do dropout op. This function calls the following kernels: + * + * -# @ref GCDropoutKernel + */ +class GCDropoutLayer : public IFunction +{ +public: + /** Constructor */ + GCDropoutLayer(); + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data type supported: F16/F32. + * @param[out] mask Destination tensor. Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] ratio Dropout ratio + * @param[in] forward Forward or backward propagation + * + */ + void configure(const IGCTensor *input, IGCTensor *mask, IGCTensor *output, float ratio, bool forward); + + //Inherited methods override + void run() override; + +private: + GCDropoutKernel _dropout_kernel; +}; +} + +#endif /* __ARM_COMPUTE_GCDROPOUTLAYER_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCFillBorder.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCFillBorder.h new file mode 100644 index 0000000000..a04e4002ff --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCFillBorder.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_GCFILLBORDER_H__ +#define __ARM_COMPUTE_GCFILLBORDER_H__ + +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref GCFillBorderKernel */ +class GCFillBorder : public IGCSimpleFunction +{ +public: + /** Initialize the function + * + * @param[in,out] tensor Source tensor. Data types supported: F16/F32 + * @param[in] border_width The border width + * @param[in] border_mode Strategy to use for borders. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(IGCTensor *tensor, unsigned int border_width, BorderMode border_mode, + const PixelValue &constant_border_value = PixelValue()); +}; +} + +#endif /*__ARM_COMPUTE_FILLBORDER_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h new file mode 100644 index 0000000000..1ae5837de0 --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCFULLYCONNECTEDLAYER_H__ +#define __ARM_COMPUTE_GCFULLYCONNECTEDLAYER_H__ + +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" +#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h" + +namespace arm_compute +{ +/** Basic function to reshape the weights of Fully Connected layer with OpenGL ES. This function calls the following kernels: + * + * -# @ref GCTransposeKernel + * + * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. + */ +class GCFullyConnectedLayerReshapeWeights : public IGCSimpleFunction +{ +public: + /** Set the input and output tensors. + * + * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: F16/F32. + * @param[out] output Destination tensor which stores the transposed input tensor. Data type supported: Same as @p input. + */ + void configure(const IGCTensor *input, IGCTensor *output); +}; + +/** Basic function to compute a Fully Connected layer on OpenGL ES. This function calls the following OpenGL ES kernels: + * + * -# @ref GCIm2ColKernel (called when the input comes from a convolutional layer) + * -# @ref GCFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once) + * -# @ref GCGEMMMatrixMultiplyKernel + * -# @ref GCGEMMMatrixAccumulateBiasesKernel (if @p biases is not equal to nullptr) + * + * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. + */ +class GCFullyConnectedLayer : public IFunction +{ +public: + /** Constructor */ + GCFullyConnectedLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data type supported: F16/F32. + * @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input + * @param[in] biases Bias tensor. It can be nullptr. Data type supported:Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] transpose_weights (Optional) Transpose weights if true. Defaults to true. + * @param[in] are_weights_reshaped (Optional) Reshape the weights tensor if false. Defaults to false. + */ + void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, bool transpose_weights = true, bool are_weights_reshaped = false); + + //Inherited methods override + void run() override; + +private: + void configure_fc_fc(const IGCTensor *input, const IGCTensor *weights, IGCTensor *output); + void configure_conv_fc(const IGCTensor *input, const IGCTensor *weights, IGCTensor *output); + + GCIm2ColKernel _im2col_kernel; + GCFullyConnectedLayerReshapeWeights _reshape_weights_kernel; + GCGEMMMatrixMultiplyKernel _mm_kernel; + GCGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; + GCTensor _im2col_output; + GCTensor _reshape_weights_output; + bool _are_weights_reshaped; + bool _is_fc_after_conv; + bool _accumulate_biases; +}; +} +#endif /* __ARM_COMPUTE_GCFULLYCONNECTEDLAYER_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h new file mode 100644 index 0000000000..f2484cd801 --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_GCGEMM_H__ +#define __ARM_COMPUTE_GCGEMM_H__ + +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Basic function to execute GEMM on OpenGLES Compute. This function calls the following kernels: + * + * -# @ref GCGEMMInterleave4x4Kernel (if the output tensor is a matrix) + * -# @ref GCGEMMTranspose1xWKernel (if the output tensor is a matrix) + * -# @ref GCGEMMMatrixMultiplyKernel + * -# @ref GCGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0) + * + */ +class GCGEMM : public IFunction +{ +public: + /** Default constructor. */ + GCGEMM(); + + /** Initialise the kernel's inputs and output + * + * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C]. + * + * @note All tensors must have the same data type. + * + * @note Whilst the first input tensor can be a vector, the second input tensor must be at least a matrix + * + * @param[in] a First input tensor (Matrix or Vector A). Data types supported: F32 + * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a. + * @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a. + * @param[out] output Output tensor. Data type supported: same as @p a + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of matrix C + */ + void configure(const IGCTensor *a, const IGCTensor *b, const IGCTensor *c, IGCTensor *output, float alpha, float beta); + + // Inherited methods overridden: + void run() override; + +private: + GCGEMMInterleave4x4Kernel _interleave_kernel; + GCGEMMTranspose1xWKernel _transpose_kernel; + GCGEMMMatrixMultiplyKernel _mm_kernel; + GCGEMMMatrixAdditionKernel _ma_kernel; + GCTensor _tmp_a; + GCTensor _tmp_b; + bool _is_interleaved_transposed; + bool _run_addition; +}; +} + +#endif /* __ARM_COMPUTE_GCGEMM_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.h new file mode 100644 index 0000000000..48fa7ed504 --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCGEMMINTERLEAVE4X4_H__ +#define __ARM_COMPUTE_GCGEMMINTERLEAVE4X4_H__ + +#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to execute GCGEMMInterleave4x4Kernel. This function calls the following OpenGL ES kernel: + * + * -# @ref GCGEMMInterleave4x4Kernel + * + */ +class GCGEMMInterleave4x4 : public IGCSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output + * + * @param[in] input First input tensor. Data types supported: F32 + * @param[out] output Output tensor. Data type supported: same as @p input + */ + void configure(const IGCTensor *input, IGCTensor *output); +}; +} + +#endif /* __ARM_COMPUTE_GCGEMMINTERLEAVE4X4_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.h new file mode 100644 index 0000000000..24af2193c3 --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCGEMMTRANSPOSE1XW_H__ +#define __ARM_COMPUTE_GCGEMMTRANSPOSE1XW_H__ + +#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h" + +namespace arm_compute +{ +/** Basic function to execute GCGEMMTranspose1xWKernel. This function calls the following OpenGLES kernels: + * + * -# @ref GCGEMMTranspose1xWKernel + * + */ +class GCGEMMTranspose1xW : public IGCSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output + * + * @param[in] input First input tensor. Data type supported: F32 + * @param[out] output Output tensor. Data type supported: same as @p input + */ + void configure(const IGCTensor *input, IGCTensor *output); +}; +} +#endif /*__ARM_COMPUTE_GCGEMMTRANSPOSE1XW_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h new file mode 100644 index 0000000000..d080a2f7b9 --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCNORMALIZATIONLAYER_H__ +#define __ARM_COMPUTE_GCNORMALIZATIONLAYER_H__ + +#include "arm_compute/runtime/IFunction.h" + +#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" + +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Basic function to compute a normalization layer. This function calls the following OpenGL ES kernels: + * + * -# @ref GCPixelWiseMultiplicationKernel + * -# @ref GCFillBorderKernel + * -# @ref GCNormalizationLayerKernel + * + */ +class GCNormalizationLayer : public IFunction +{ +public: + /** Default constructor */ + GCNormalizationLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: F32. Number of channels must be 1. + * @param[out] output Destination tensor. Dimensions, data type and number of channels must match the input ones. + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + */ + void configure(const IGCTensor *input, IGCTensor *output, const NormalizationLayerInfo &norm_info); + + // Inherited methods overridden: + void run() override; + +private: + GCTensor _squared_input; /**< The intermediate buffer which stores results of squaring input*/ + GCNormalizationLayerKernel _norm_kernel; /**< Normalization layer kernel to run */ + GCPixelWiseMultiplicationKernel _multiply_kernel; /**< Pixel multiplication kernel to run */ + GCFillBorderKernel _border_handler; /**< Kernel to handle borders */ +}; +} +#endif /* __ARM_COMPUTE_GCNORMALIZATIONLAYER_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.h new file mode 100644 index 0000000000..e6239edc2f --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCPIXELWISEMULTIPLICATION_H__ +#define __ARM_COMPUTE_GCPIXELWISEMULTIPLICATION_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Basic function to run @ref GCPixelWiseMultiplicationKernel. */ +class GCPixelWiseMultiplication : public IGCSimpleFunction +{ +public: + /** Initialise the kernel's inputs, output and convertion policy. + * + * @param[in] input1 First tensor input. Data types supported: F32. + * @param[in] input2 Second tensor input. Data types supported: Same as @p input1. + * @param[out] output Output tensor. Data types supported: Same as @p input1. + * @param[in] scale Scale to apply after multiplication. Must be a positive value. + */ + void configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output, float scale); +}; +} +#endif /*__ARM_COMPUTE_GCPIXELWISEMULTIPLICATION_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h new file mode 100644 index 0000000000..cce44d0c3c --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCPOOLINGLAYER_H__ +#define __ARM_COMPUTE_GCPOOLINGLAYER_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h" + +#include + +namespace arm_compute +{ +class IGCTensor; + +/** Basic function to simulate a pooling layer with the specified pooling operation. This function calls the following OpenGL ES kernels: + * + * -# @ref GCFillBorderKernel (executed if padding size is different from zero) + * -# @ref GCPoolingLayerKernel + */ +class GCPoolingLayer : public IGCSimpleFunction +{ +public: + /** Set the input and output tensors. + * + * @param[in,out] input Source tensor. (Written to only when padding != 0) Data types supported: F16/F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + */ + void configure(IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info); +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_GCPOOLINGLAYER_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h new file mode 100644 index 0000000000..19bfb83eca --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCSOFTMAXLAYER_H__ +#define __ARM_COMPUTE_GCSOFTMAXLAYER_H__ + +#include "arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" +#include "arm_compute/runtime/IFunction.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Basic function to compute a SoftmaxLayer. + * + * Softmax is calculated by : + * @f[ out = exp(x - max(x)) / sum(exp(x - max(x))) @f] + * + * This function runs the following kernels: + * -# @ref GCLogits1DMaxKernel + * -# @ref GCLogits1DShiftExpSumKernel + * -# @ref GCLogits1DNormKernel + */ +class GCSoftmaxLayer : public IFunction +{ +public: + /** Constructor */ + GCSoftmaxLayer(); + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16/F32 + * @param[out] output Destination tensor. Data types supported: same as @p input + */ + void configure(const IGCTensor *input, IGCTensor *output); + + // Inherited methods overridden: + void run() override; + +private: + GCLogits1DMaxKernel _max_kernel; + GCLogits1DShiftExpSumKernel _shift_exp_sum_kernel; + GCLogits1DNormKernel _norm_kernel; + GCTensor _max; + GCTensor _sum; + GCTensor _tmp; +}; +} +#endif /* __ARM_COMPUTE_GCSOFTMAXLAYER_H__ */ diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCTranspose.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCTranspose.h new file mode 100644 index 0000000000..23324343f9 --- /dev/null +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCTranspose.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCTRANSPOSE_H__ +#define __ARM_COMPUTE_GCTRANSPOSE_H__ + +#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Basic function to transpose a matrix on OpenGL ES. This function calls the following OpenGL ES kernel: + * + * -# @ref GCTransposeKernel + * + */ +class GCTranspose : public IGCSimpleFunction +{ +public: + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data types supported: F16/F32 + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(const IGCTensor *input, IGCTensor *output); +}; +} + +#endif /* __ARM_COMPUTE_GCTRANSPOSE_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h index 041b9e7290..b2de7162f1 100644 --- a/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h @@ -50,12 +50,12 @@ public: * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result. * 3 lower dimensions represent a single input with dimensions [width, height, FM]. * The rest are optional and used for representing batches. Data types supported: QS8/QS16/F16/F32. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input - * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input * @param[in] epsilon Small value to avoid division with zero. - * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input */ void configure(ITensor *input, ITensor *output, const ITensor *mean, const ITensor *var, const ITensor *beta, const ITensor *gamma, float epsilon); diff --git a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h index 1c95c5bc4a..0d5656d602 100644 --- a/arm_compute/runtime/NEON/functions/NENormalizationLayer.h +++ b/arm_compute/runtime/NEON/functions/NENormalizationLayer.h @@ -40,7 +40,7 @@ namespace arm_compute { class ITensor; -/** Basic function to simulate a normalization layer. This function calls the following NEON kernels: +/** Basic function to compute a normalization layer. This function calls the following NEON kernels: * * -# @ref NEPixelWiseMultiplicationKernel * -# @ref NEFillBorderKernel @@ -59,7 +59,7 @@ public: * @param[out] output Destination with the same dimensions, data type and number of channels of @p input * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. */ - void configure(const ITensor *input, ITensor *output, NormalizationLayerInfo norm_info); + void configure(const ITensor *input, ITensor *output, const NormalizationLayerInfo &norm_info); // Inherited methods overridden: void run() override; diff --git a/examples/SConscript b/examples/SConscript index 52d2f26cfe..90b271d473 100644 --- a/examples/SConscript +++ b/examples/SConscript @@ -27,11 +27,18 @@ Import('env') if env['opencl']: Import('opencl') +if env['gles_compute'] and env['os'] != 'android': + Import('egl') + Import('glesv2') + examples_env = env.Clone() examples_env.Append(CPPPATH = ["#"]) examples_env.Append(LIBPATH = ["#build/%s/opencl-1.2-stubs" % env['build_dir']]) +if env['gles_compute'] and env['os'] != 'android': + examples_env.Append(LIBPATH = ["#build/%s/opengles-3.1/stubs" % env['build_dir']]) + # Build examples utils = examples_env.Object("../utils/Utils.cpp") @@ -86,3 +93,19 @@ if env['neon']: Depends(prog, arm_compute_dependency) alias = examples_env.Alias(example, prog) Default(alias) + +if env['gles_compute']: + for file in Glob("./gc_*.cpp"): + example = os.path.basename(os.path.splitext(str(file))[0]) + if env['os'] != 'android': + examples_env.Append(CPPPATH = ["#opengles-3.1/include", "#opengles-3.1/mali_include"]) + prog = examples_env.Program(example, ["{}.cpp".format(example), utils], CPPDEFINES=['ARM_COMPUTE_GC'], LIBS = [arm_compute_libs, "EGL", "GLESv2"]) + Depends(prog, [arm_compute_dependency, egl, glesv2]) + else: + if env['arch'] != 'armv7a': + prog = examples_env.Program(example, ["{}.cpp".format(example), utils], CPPDEFINES=['ARM_COMPUTE_GC'], LIBS = [arm_compute_libs, "EGL", "GLESv3"]) + else: + prog = examples_env.Program(example, ["{}.cpp".format(example), utils], CPPDEFINES=['ARM_COMPUTE_GC'], LIBS = [arm_compute_libs, "EGL", "GLESv2"]) + Depends(prog, [arm_compute_dependency]) + alias = examples_env.Alias(example, prog) + Default(alias) diff --git a/examples/gc_absdiff.cpp b/examples/gc_absdiff.cpp new file mode 100644 index 0000000000..cd3e42989c --- /dev/null +++ b/examples/gc_absdiff.cpp @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_GC /* Needed by Utils.cpp to handle OpenGL ES exceptions properly */ +#error "This example needs to be built with -DARM_COMPUTE_GC" +#endif /* ARM_COMPUTE_GC */ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCFunctions.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" +#include "utils/Utils.h" + +using namespace arm_compute; +using namespace utils; + +void main_gc_absdiff(int argc, const char **argv) +{ + PPMLoader ppm1, ppm2; + GCImage src1, src2, dst; + GCScheduler::get().default_init(); + if(argc < 2) + { + // Print help + std::cout << "Usage: " << argv[0] << " [input0_image.ppm] [input1_image.ppm] \n\n"; + std::cout << "No input_image provided, creating two dummy 640x480 images\n"; + // Create two empty grayscale 640x480 images + src1.allocator()->init(TensorInfo(640, 480, Format::U8)); + src2.allocator()->init(TensorInfo(640, 480, Format::U8)); + } + else if(argc < 3) + { + // Print help + std::cout << "Usage: " << argv[0] << " [input0_image.ppm] [input1_image.ppm] \n\n"; + std::cout << "Only one input_image provided, creating a dummy 640x480 image\n"; + ppm1.open(argv[1]); + ppm1.init_image(src1, Format::U8); + // Create an empty grayscale 640x480 image + src2.allocator()->init(TensorInfo(640, 480, Format::U8)); + } + else + { + ppm1.open(argv[1]); + ppm1.init_image(src1, Format::U8); + ppm2.open(argv[2]); + ppm2.init_image(src2, Format::U8); + } + + // Configure the temporary and destination images + dst.allocator()->init(*src1.info()); + + GCAbsoluteDifference absdiff; + absdiff.configure(&src1, &src2, &dst); + + // Allocate all the images + src1.allocator()->allocate(); + src2.allocator()->allocate(); + dst.allocator()->allocate(); + + // Fill the input image with the content of the PPM image if a filename was provided: + if(ppm1.is_open()) + { + ppm1.fill_image(src1); + } + if(ppm2.is_open()) + { + ppm2.fill_image(src2); + } + + // Execute the functions: + absdiff.run(); + + // Make sure all the jobs are done executing: + GCScheduler::get().sync(); + + // Save the result to file: + if(ppm1.is_open()) + { + const std::string output_filename = std::string(argv[1]) + "_out.ppm"; + save_to_ppm(dst, output_filename); // save_to_ppm maps and unmaps the image to store as PPM + } +} + +/** Main program for absdiff test + * + * @param[in] argc Number of arguments + * @param[in] argv Arguments ( [optional] Path to the first PPM image to process, [optional] Path the the second PPM image to process ) + */ +int main(int argc, const char **argv) +{ + return utils::run_example(argc, argv, main_gc_absdiff); +} diff --git a/opengles-3.1/include/EGL/egl.h b/opengles-3.1/include/EGL/egl.h new file mode 100644 index 0000000000..13ccde3558 --- /dev/null +++ b/opengles-3.1/include/EGL/egl.h @@ -0,0 +1,303 @@ +#ifndef __egl_h_ +#define __egl_h_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/* +** Copyright (c) 2013-2014 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ +/* +** This header is generated from the Khronos OpenGL / OpenGL ES XML +** API Registry. The current version of the Registry, generator scripts +** used to make the header, and the header can be found at +** http://www.opengl.org/registry/ +** +** Khronos $Revision: 28371 $ on $Date: 2014-10-01 09:16:09 -0700 (Wed, 01 Oct 2014) $ +*/ + +#include + +/* Generated on date 20141001 */ + +/* Generated C header for: + * API: egl + * Versions considered: .* + * Versions emitted: .* + * Default extensions included: None + * Additional extensions included: _nomatch_^ + * Extensions removed: _nomatch_^ + */ + +#ifndef EGL_VERSION_1_0 +#define EGL_VERSION_1_0 1 +typedef unsigned int EGLBoolean; +typedef void *EGLDisplay; +#include +#include +typedef void *EGLConfig; +typedef void *EGLSurface; +typedef void *EGLContext; +typedef void (*__eglMustCastToProperFunctionPointerType)(void); +#define EGL_ALPHA_SIZE 0x3021 +#define EGL_BAD_ACCESS 0x3002 +#define EGL_BAD_ALLOC 0x3003 +#define EGL_BAD_ATTRIBUTE 0x3004 +#define EGL_BAD_CONFIG 0x3005 +#define EGL_BAD_CONTEXT 0x3006 +#define EGL_BAD_CURRENT_SURFACE 0x3007 +#define EGL_BAD_DISPLAY 0x3008 +#define EGL_BAD_MATCH 0x3009 +#define EGL_BAD_NATIVE_PIXMAP 0x300A +#define EGL_BAD_NATIVE_WINDOW 0x300B +#define EGL_BAD_PARAMETER 0x300C +#define EGL_BAD_SURFACE 0x300D +#define EGL_BLUE_SIZE 0x3022 +#define EGL_BUFFER_SIZE 0x3020 +#define EGL_CONFIG_CAVEAT 0x3027 +#define EGL_CONFIG_ID 0x3028 +#define EGL_CORE_NATIVE_ENGINE 0x305B +#define EGL_DEPTH_SIZE 0x3025 +#define EGL_DONT_CARE ((EGLint)-1) +#define EGL_DRAW 0x3059 +#define EGL_EXTENSIONS 0x3055 +#define EGL_FALSE 0 +#define EGL_GREEN_SIZE 0x3023 +#define EGL_HEIGHT 0x3056 +#define EGL_LARGEST_PBUFFER 0x3058 +#define EGL_LEVEL 0x3029 +#define EGL_MAX_PBUFFER_HEIGHT 0x302A +#define EGL_MAX_PBUFFER_PIXELS 0x302B +#define EGL_MAX_PBUFFER_WIDTH 0x302C +#define EGL_NATIVE_RENDERABLE 0x302D +#define EGL_NATIVE_VISUAL_ID 0x302E +#define EGL_NATIVE_VISUAL_TYPE 0x302F +#define EGL_NONE 0x3038 +#define EGL_NON_CONFORMANT_CONFIG 0x3051 +#define EGL_NOT_INITIALIZED 0x3001 +#define EGL_NO_CONTEXT ((EGLContext)0) +#define EGL_NO_DISPLAY ((EGLDisplay)0) +#define EGL_NO_SURFACE ((EGLSurface)0) +#define EGL_PBUFFER_BIT 0x0001 +#define EGL_PIXMAP_BIT 0x0002 +#define EGL_READ 0x305A +#define EGL_RED_SIZE 0x3024 +#define EGL_SAMPLES 0x3031 +#define EGL_SAMPLE_BUFFERS 0x3032 +#define EGL_SLOW_CONFIG 0x3050 +#define EGL_STENCIL_SIZE 0x3026 +#define EGL_SUCCESS 0x3000 +#define EGL_SURFACE_TYPE 0x3033 +#define EGL_TRANSPARENT_BLUE_VALUE 0x3035 +#define EGL_TRANSPARENT_GREEN_VALUE 0x3036 +#define EGL_TRANSPARENT_RED_VALUE 0x3037 +#define EGL_TRANSPARENT_RGB 0x3052 +#define EGL_TRANSPARENT_TYPE 0x3034 +#define EGL_TRUE 1 +#define EGL_VENDOR 0x3053 +#define EGL_VERSION 0x3054 +#define EGL_WIDTH 0x3057 +#define EGL_WINDOW_BIT 0x0004 +EGLAPI EGLBoolean EGLAPIENTRY eglChooseConfig (EGLDisplay dpy, const EGLint *attrib_list, EGLConfig *configs, EGLint config_size, EGLint *num_config); +EGLAPI EGLBoolean EGLAPIENTRY eglCopyBuffers (EGLDisplay dpy, EGLSurface surface, EGLNativePixmapType target); +EGLAPI EGLContext EGLAPIENTRY eglCreateContext (EGLDisplay dpy, EGLConfig config, EGLContext share_context, const EGLint *attrib_list); +EGLAPI EGLSurface EGLAPIENTRY eglCreatePbufferSurface (EGLDisplay dpy, EGLConfig config, const EGLint *attrib_list); +EGLAPI EGLSurface EGLAPIENTRY eglCreatePixmapSurface (EGLDisplay dpy, EGLConfig config, EGLNativePixmapType pixmap, const EGLint *attrib_list); +EGLAPI EGLSurface EGLAPIENTRY eglCreateWindowSurface (EGLDisplay dpy, EGLConfig config, EGLNativeWindowType win, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroyContext (EGLDisplay dpy, EGLContext ctx); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroySurface (EGLDisplay dpy, EGLSurface surface); +EGLAPI EGLBoolean EGLAPIENTRY eglGetConfigAttrib (EGLDisplay dpy, EGLConfig config, EGLint attribute, EGLint *value); +EGLAPI EGLBoolean EGLAPIENTRY eglGetConfigs (EGLDisplay dpy, EGLConfig *configs, EGLint config_size, EGLint *num_config); +EGLAPI EGLDisplay EGLAPIENTRY eglGetCurrentDisplay (void); +EGLAPI EGLSurface EGLAPIENTRY eglGetCurrentSurface (EGLint readdraw); +EGLAPI EGLDisplay EGLAPIENTRY eglGetDisplay (EGLNativeDisplayType display_id); +EGLAPI EGLint EGLAPIENTRY eglGetError (void); +EGLAPI __eglMustCastToProperFunctionPointerType EGLAPIENTRY eglGetProcAddress (const char *procname); +EGLAPI EGLBoolean EGLAPIENTRY eglInitialize (EGLDisplay dpy, EGLint *major, EGLint *minor); +EGLAPI EGLBoolean EGLAPIENTRY eglMakeCurrent (EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext ctx); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryContext (EGLDisplay dpy, EGLContext ctx, EGLint attribute, EGLint *value); +EGLAPI const char *EGLAPIENTRY eglQueryString (EGLDisplay dpy, EGLint name); +EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurface (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint *value); +EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffers (EGLDisplay dpy, EGLSurface surface); +EGLAPI EGLBoolean EGLAPIENTRY eglTerminate (EGLDisplay dpy); +EGLAPI EGLBoolean EGLAPIENTRY eglWaitGL (void); +EGLAPI EGLBoolean EGLAPIENTRY eglWaitNative (EGLint engine); +#endif /* EGL_VERSION_1_0 */ + +#ifndef EGL_VERSION_1_1 +#define EGL_VERSION_1_1 1 +#define EGL_BACK_BUFFER 0x3084 +#define EGL_BIND_TO_TEXTURE_RGB 0x3039 +#define EGL_BIND_TO_TEXTURE_RGBA 0x303A +#define EGL_CONTEXT_LOST 0x300E +#define EGL_MIN_SWAP_INTERVAL 0x303B +#define EGL_MAX_SWAP_INTERVAL 0x303C +#define EGL_MIPMAP_TEXTURE 0x3082 +#define EGL_MIPMAP_LEVEL 0x3083 +#define EGL_NO_TEXTURE 0x305C +#define EGL_TEXTURE_2D 0x305F +#define EGL_TEXTURE_FORMAT 0x3080 +#define EGL_TEXTURE_RGB 0x305D +#define EGL_TEXTURE_RGBA 0x305E +#define EGL_TEXTURE_TARGET 0x3081 +EGLAPI EGLBoolean EGLAPIENTRY eglBindTexImage (EGLDisplay dpy, EGLSurface surface, EGLint buffer); +EGLAPI EGLBoolean EGLAPIENTRY eglReleaseTexImage (EGLDisplay dpy, EGLSurface surface, EGLint buffer); +EGLAPI EGLBoolean EGLAPIENTRY eglSurfaceAttrib (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint value); +EGLAPI EGLBoolean EGLAPIENTRY eglSwapInterval (EGLDisplay dpy, EGLint interval); +#endif /* EGL_VERSION_1_1 */ + +#ifndef EGL_VERSION_1_2 +#define EGL_VERSION_1_2 1 +typedef unsigned int EGLenum; +typedef void *EGLClientBuffer; +#define EGL_ALPHA_FORMAT 0x3088 +#define EGL_ALPHA_FORMAT_NONPRE 0x308B +#define EGL_ALPHA_FORMAT_PRE 0x308C +#define EGL_ALPHA_MASK_SIZE 0x303E +#define EGL_BUFFER_PRESERVED 0x3094 +#define EGL_BUFFER_DESTROYED 0x3095 +#define EGL_CLIENT_APIS 0x308D +#define EGL_COLORSPACE 0x3087 +#define EGL_COLORSPACE_sRGB 0x3089 +#define EGL_COLORSPACE_LINEAR 0x308A +#define EGL_COLOR_BUFFER_TYPE 0x303F +#define EGL_CONTEXT_CLIENT_TYPE 0x3097 +#define EGL_DISPLAY_SCALING 10000 +#define EGL_HORIZONTAL_RESOLUTION 0x3090 +#define EGL_LUMINANCE_BUFFER 0x308F +#define EGL_LUMINANCE_SIZE 0x303D +#define EGL_OPENGL_ES_BIT 0x0001 +#define EGL_OPENVG_BIT 0x0002 +#define EGL_OPENGL_ES_API 0x30A0 +#define EGL_OPENVG_API 0x30A1 +#define EGL_OPENVG_IMAGE 0x3096 +#define EGL_PIXEL_ASPECT_RATIO 0x3092 +#define EGL_RENDERABLE_TYPE 0x3040 +#define EGL_RENDER_BUFFER 0x3086 +#define EGL_RGB_BUFFER 0x308E +#define EGL_SINGLE_BUFFER 0x3085 +#define EGL_SWAP_BEHAVIOR 0x3093 +#define EGL_UNKNOWN ((EGLint)-1) +#define EGL_VERTICAL_RESOLUTION 0x3091 +EGLAPI EGLBoolean EGLAPIENTRY eglBindAPI (EGLenum api); +EGLAPI EGLenum EGLAPIENTRY eglQueryAPI (void); +EGLAPI EGLSurface EGLAPIENTRY eglCreatePbufferFromClientBuffer (EGLDisplay dpy, EGLenum buftype, EGLClientBuffer buffer, EGLConfig config, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglReleaseThread (void); +EGLAPI EGLBoolean EGLAPIENTRY eglWaitClient (void); +#endif /* EGL_VERSION_1_2 */ + +#ifndef EGL_VERSION_1_3 +#define EGL_VERSION_1_3 1 +#define EGL_CONFORMANT 0x3042 +#define EGL_CONTEXT_CLIENT_VERSION 0x3098 +#define EGL_MATCH_NATIVE_PIXMAP 0x3041 +#define EGL_OPENGL_ES2_BIT 0x0004 +#define EGL_VG_ALPHA_FORMAT 0x3088 +#define EGL_VG_ALPHA_FORMAT_NONPRE 0x308B +#define EGL_VG_ALPHA_FORMAT_PRE 0x308C +#define EGL_VG_ALPHA_FORMAT_PRE_BIT 0x0040 +#define EGL_VG_COLORSPACE 0x3087 +#define EGL_VG_COLORSPACE_sRGB 0x3089 +#define EGL_VG_COLORSPACE_LINEAR 0x308A +#define EGL_VG_COLORSPACE_LINEAR_BIT 0x0020 +#endif /* EGL_VERSION_1_3 */ + +#ifndef EGL_VERSION_1_4 +#define EGL_VERSION_1_4 1 +#define EGL_DEFAULT_DISPLAY ((EGLNativeDisplayType)0) +#define EGL_MULTISAMPLE_RESOLVE_BOX_BIT 0x0200 +#define EGL_MULTISAMPLE_RESOLVE 0x3099 +#define EGL_MULTISAMPLE_RESOLVE_DEFAULT 0x309A +#define EGL_MULTISAMPLE_RESOLVE_BOX 0x309B +#define EGL_OPENGL_API 0x30A2 +#define EGL_OPENGL_BIT 0x0008 +#define EGL_SWAP_BEHAVIOR_PRESERVED_BIT 0x0400 +EGLAPI EGLContext EGLAPIENTRY eglGetCurrentContext (void); +#endif /* EGL_VERSION_1_4 */ + +#ifndef EGL_VERSION_1_5 +#define EGL_VERSION_1_5 1 +typedef void *EGLSync; +typedef intptr_t EGLAttrib; +typedef khronos_utime_nanoseconds_t EGLTime; +typedef void *EGLImage; +#define EGL_CONTEXT_MAJOR_VERSION 0x3098 +#define EGL_CONTEXT_MINOR_VERSION 0x30FB +#define EGL_CONTEXT_OPENGL_PROFILE_MASK 0x30FD +#define EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY 0x31BD +#define EGL_NO_RESET_NOTIFICATION 0x31BE +#define EGL_LOSE_CONTEXT_ON_RESET 0x31BF +#define EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT 0x00000001 +#define EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT 0x00000002 +#define EGL_CONTEXT_OPENGL_DEBUG 0x31B0 +#define EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE 0x31B1 +#define EGL_CONTEXT_OPENGL_ROBUST_ACCESS 0x31B2 +#define EGL_OPENGL_ES3_BIT 0x00000040 +#define EGL_CL_EVENT_HANDLE 0x309C +#define EGL_SYNC_CL_EVENT 0x30FE +#define EGL_SYNC_CL_EVENT_COMPLETE 0x30FF +#define EGL_SYNC_PRIOR_COMMANDS_COMPLETE 0x30F0 +#define EGL_SYNC_TYPE 0x30F7 +#define EGL_SYNC_STATUS 0x30F1 +#define EGL_SYNC_CONDITION 0x30F8 +#define EGL_SIGNALED 0x30F2 +#define EGL_UNSIGNALED 0x30F3 +#define EGL_SYNC_FLUSH_COMMANDS_BIT 0x0001 +#define EGL_FOREVER 0xFFFFFFFFFFFFFFFFull +#define EGL_TIMEOUT_EXPIRED 0x30F5 +#define EGL_CONDITION_SATISFIED 0x30F6 +#define EGL_NO_SYNC ((EGLSync)0) +#define EGL_SYNC_FENCE 0x30F9 +#define EGL_GL_COLORSPACE 0x309D +#define EGL_GL_COLORSPACE_SRGB 0x3089 +#define EGL_GL_COLORSPACE_LINEAR 0x308A +#define EGL_GL_RENDERBUFFER 0x30B9 +#define EGL_GL_TEXTURE_2D 0x30B1 +#define EGL_GL_TEXTURE_LEVEL 0x30BC +#define EGL_GL_TEXTURE_3D 0x30B2 +#define EGL_GL_TEXTURE_ZOFFSET 0x30BD +#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x30B3 +#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x30B4 +#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x30B5 +#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x30B6 +#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x30B7 +#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x30B8 +#define EGL_IMAGE_PRESERVED 0x30D2 +#define EGL_NO_IMAGE ((EGLImage)0) +EGLAPI EGLSync EGLAPIENTRY eglCreateSync (EGLDisplay dpy, EGLenum type, const EGLAttrib *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroySync (EGLDisplay dpy, EGLSync sync); +EGLAPI EGLint EGLAPIENTRY eglClientWaitSync (EGLDisplay dpy, EGLSync sync, EGLint flags, EGLTime timeout); +EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncAttrib (EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLAttrib *value); +EGLAPI EGLImage EGLAPIENTRY eglCreateImage (EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroyImage (EGLDisplay dpy, EGLImage image); +EGLAPI EGLDisplay EGLAPIENTRY eglGetPlatformDisplay (EGLenum platform, void *native_display, const EGLAttrib *attrib_list); +EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformWindowSurface (EGLDisplay dpy, EGLConfig config, void *native_window, const EGLAttrib *attrib_list); +EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformPixmapSurface (EGLDisplay dpy, EGLConfig config, void *native_pixmap, const EGLAttrib *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglWaitSync (EGLDisplay dpy, EGLSync sync, EGLint flags); +#endif /* EGL_VERSION_1_5 */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/opengles-3.1/include/EGL/eglext.h b/opengles-3.1/include/EGL/eglext.h new file mode 100644 index 0000000000..d8e5ab537e --- /dev/null +++ b/opengles-3.1/include/EGL/eglext.h @@ -0,0 +1,804 @@ +#ifndef __eglext_h_ +#define __eglext_h_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/* +** Copyright (c) 2013-2014 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ +/* +** This header is generated from the Khronos OpenGL / OpenGL ES XML +** API Registry. The current version of the Registry, generator scripts +** used to make the header, and the header can be found at +** http://www.opengl.org/registry/ +** +** Khronos $Revision: 28371 $ on $Date: 2014-10-01 09:16:09 -0700 (Wed, 01 Oct 2014) $ +*/ + +#include + +#define EGL_EGLEXT_VERSION 20141001 + +/* Generated C header for: + * API: egl + * Versions considered: .* + * Versions emitted: _nomatch_^ + * Default extensions included: egl + * Additional extensions included: _nomatch_^ + * Extensions removed: _nomatch_^ + */ + +#ifndef EGL_KHR_cl_event +#define EGL_KHR_cl_event 1 +#define EGL_CL_EVENT_HANDLE_KHR 0x309C +#define EGL_SYNC_CL_EVENT_KHR 0x30FE +#define EGL_SYNC_CL_EVENT_COMPLETE_KHR 0x30FF +#endif /* EGL_KHR_cl_event */ + +#ifndef EGL_KHR_cl_event2 +#define EGL_KHR_cl_event2 1 +typedef void *EGLSyncKHR; +typedef intptr_t EGLAttribKHR; +typedef EGLSyncKHR (EGLAPIENTRYP PFNEGLCREATESYNC64KHRPROC) (EGLDisplay dpy, EGLenum type, const EGLAttribKHR *attrib_list); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateSync64KHR (EGLDisplay dpy, EGLenum type, const EGLAttribKHR *attrib_list); +#endif +#endif /* EGL_KHR_cl_event2 */ + +#ifndef EGL_KHR_client_get_all_proc_addresses +#define EGL_KHR_client_get_all_proc_addresses 1 +#endif /* EGL_KHR_client_get_all_proc_addresses */ + +#ifndef EGL_KHR_config_attribs +#define EGL_KHR_config_attribs 1 +#define EGL_CONFORMANT_KHR 0x3042 +#define EGL_VG_COLORSPACE_LINEAR_BIT_KHR 0x0020 +#define EGL_VG_ALPHA_FORMAT_PRE_BIT_KHR 0x0040 +#endif /* EGL_KHR_config_attribs */ + +#ifndef EGL_KHR_create_context +#define EGL_KHR_create_context 1 +#define EGL_CONTEXT_MAJOR_VERSION_KHR 0x3098 +#define EGL_CONTEXT_MINOR_VERSION_KHR 0x30FB +#define EGL_CONTEXT_FLAGS_KHR 0x30FC +#define EGL_CONTEXT_OPENGL_PROFILE_MASK_KHR 0x30FD +#define EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY_KHR 0x31BD +#define EGL_NO_RESET_NOTIFICATION_KHR 0x31BE +#define EGL_LOSE_CONTEXT_ON_RESET_KHR 0x31BF +#define EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR 0x00000001 +#define EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE_BIT_KHR 0x00000002 +#define EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR 0x00000004 +#define EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT_KHR 0x00000001 +#define EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT_KHR 0x00000002 +#define EGL_OPENGL_ES3_BIT_KHR 0x00000040 +#endif /* EGL_KHR_create_context */ + +#ifndef EGL_KHR_fence_sync +#define EGL_KHR_fence_sync 1 +#ifdef KHRONOS_SUPPORT_INT64 +#define EGL_SYNC_PRIOR_COMMANDS_COMPLETE_KHR 0x30F0 +#define EGL_SYNC_CONDITION_KHR 0x30F8 +#define EGL_SYNC_FENCE_KHR 0x30F9 +#endif /* KHRONOS_SUPPORT_INT64 */ +#endif /* EGL_KHR_fence_sync */ + +#ifndef EGL_KHR_get_all_proc_addresses +#define EGL_KHR_get_all_proc_addresses 1 +#endif /* EGL_KHR_get_all_proc_addresses */ + +#ifndef EGL_KHR_gl_colorspace +#define EGL_KHR_gl_colorspace 1 +#define EGL_GL_COLORSPACE_KHR 0x309D +#define EGL_GL_COLORSPACE_SRGB_KHR 0x3089 +#define EGL_GL_COLORSPACE_LINEAR_KHR 0x308A +#endif /* EGL_KHR_gl_colorspace */ + +#ifndef EGL_KHR_gl_renderbuffer_image +#define EGL_KHR_gl_renderbuffer_image 1 +#define EGL_GL_RENDERBUFFER_KHR 0x30B9 +#endif /* EGL_KHR_gl_renderbuffer_image */ + +#ifndef EGL_KHR_gl_texture_2D_image +#define EGL_KHR_gl_texture_2D_image 1 +#define EGL_GL_TEXTURE_2D_KHR 0x30B1 +#define EGL_GL_TEXTURE_LEVEL_KHR 0x30BC +#endif /* EGL_KHR_gl_texture_2D_image */ + +#ifndef EGL_KHR_gl_texture_3D_image +#define EGL_KHR_gl_texture_3D_image 1 +#define EGL_GL_TEXTURE_3D_KHR 0x30B2 +#define EGL_GL_TEXTURE_ZOFFSET_KHR 0x30BD +#endif /* EGL_KHR_gl_texture_3D_image */ + +#ifndef EGL_KHR_gl_texture_cubemap_image +#define EGL_KHR_gl_texture_cubemap_image 1 +#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_X_KHR 0x30B3 +#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_X_KHR 0x30B4 +#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Y_KHR 0x30B5 +#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_KHR 0x30B6 +#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Z_KHR 0x30B7 +#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_KHR 0x30B8 +#endif /* EGL_KHR_gl_texture_cubemap_image */ + +#ifndef EGL_KHR_image +#define EGL_KHR_image 1 +typedef void *EGLImageKHR; +#define EGL_NATIVE_PIXMAP_KHR 0x30B0 +#define EGL_NO_IMAGE_KHR ((EGLImageKHR)0) +typedef EGLImageKHR (EGLAPIENTRYP PFNEGLCREATEIMAGEKHRPROC) (EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLint *attrib_list); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYIMAGEKHRPROC) (EGLDisplay dpy, EGLImageKHR image); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLImageKHR EGLAPIENTRY eglCreateImageKHR (EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroyImageKHR (EGLDisplay dpy, EGLImageKHR image); +#endif +#endif /* EGL_KHR_image */ + +#ifndef EGL_KHR_image_base +#define EGL_KHR_image_base 1 +#define EGL_IMAGE_PRESERVED_KHR 0x30D2 +#endif /* EGL_KHR_image_base */ + +#ifndef EGL_KHR_image_pixmap +#define EGL_KHR_image_pixmap 1 +#endif /* EGL_KHR_image_pixmap */ + +#ifndef EGL_KHR_lock_surface +#define EGL_KHR_lock_surface 1 +#define EGL_READ_SURFACE_BIT_KHR 0x0001 +#define EGL_WRITE_SURFACE_BIT_KHR 0x0002 +#define EGL_LOCK_SURFACE_BIT_KHR 0x0080 +#define EGL_OPTIMAL_FORMAT_BIT_KHR 0x0100 +#define EGL_MATCH_FORMAT_KHR 0x3043 +#define EGL_FORMAT_RGB_565_EXACT_KHR 0x30C0 +#define EGL_FORMAT_RGB_565_KHR 0x30C1 +#define EGL_FORMAT_RGBA_8888_EXACT_KHR 0x30C2 +#define EGL_FORMAT_RGBA_8888_KHR 0x30C3 +#define EGL_MAP_PRESERVE_PIXELS_KHR 0x30C4 +#define EGL_LOCK_USAGE_HINT_KHR 0x30C5 +#define EGL_BITMAP_POINTER_KHR 0x30C6 +#define EGL_BITMAP_PITCH_KHR 0x30C7 +#define EGL_BITMAP_ORIGIN_KHR 0x30C8 +#define EGL_BITMAP_PIXEL_RED_OFFSET_KHR 0x30C9 +#define EGL_BITMAP_PIXEL_GREEN_OFFSET_KHR 0x30CA +#define EGL_BITMAP_PIXEL_BLUE_OFFSET_KHR 0x30CB +#define EGL_BITMAP_PIXEL_ALPHA_OFFSET_KHR 0x30CC +#define EGL_BITMAP_PIXEL_LUMINANCE_OFFSET_KHR 0x30CD +#define EGL_LOWER_LEFT_KHR 0x30CE +#define EGL_UPPER_LEFT_KHR 0x30CF +typedef EGLBoolean (EGLAPIENTRYP PFNEGLLOCKSURFACEKHRPROC) (EGLDisplay dpy, EGLSurface surface, const EGLint *attrib_list); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLUNLOCKSURFACEKHRPROC) (EGLDisplay dpy, EGLSurface surface); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglLockSurfaceKHR (EGLDisplay dpy, EGLSurface surface, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglUnlockSurfaceKHR (EGLDisplay dpy, EGLSurface surface); +#endif +#endif /* EGL_KHR_lock_surface */ + +#ifndef EGL_KHR_lock_surface2 +#define EGL_KHR_lock_surface2 1 +#define EGL_BITMAP_PIXEL_SIZE_KHR 0x3110 +#endif /* EGL_KHR_lock_surface2 */ + +#ifndef EGL_KHR_lock_surface3 +#define EGL_KHR_lock_surface3 1 +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSURFACE64KHRPROC) (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLAttribKHR *value); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurface64KHR (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLAttribKHR *value); +#endif +#endif /* EGL_KHR_lock_surface3 */ + +#ifndef EGL_KHR_platform_android +#define EGL_KHR_platform_android 1 +#define EGL_PLATFORM_ANDROID_KHR 0x3141 +#endif /* EGL_KHR_platform_android */ + +#ifndef EGL_KHR_platform_gbm +#define EGL_KHR_platform_gbm 1 +#define EGL_PLATFORM_GBM_KHR 0x31D7 +#endif /* EGL_KHR_platform_gbm */ + +#ifndef EGL_KHR_platform_wayland +#define EGL_KHR_platform_wayland 1 +#define EGL_PLATFORM_WAYLAND_KHR 0x31D8 +#endif /* EGL_KHR_platform_wayland */ + +#ifndef EGL_KHR_platform_x11 +#define EGL_KHR_platform_x11 1 +#define EGL_PLATFORM_X11_KHR 0x31D5 +#define EGL_PLATFORM_X11_SCREEN_KHR 0x31D6 +#endif /* EGL_KHR_platform_x11 */ + +#ifndef EGL_KHR_reusable_sync +#define EGL_KHR_reusable_sync 1 +typedef khronos_utime_nanoseconds_t EGLTimeKHR; +#ifdef KHRONOS_SUPPORT_INT64 +#define EGL_SYNC_STATUS_KHR 0x30F1 +#define EGL_SIGNALED_KHR 0x30F2 +#define EGL_UNSIGNALED_KHR 0x30F3 +#define EGL_TIMEOUT_EXPIRED_KHR 0x30F5 +#define EGL_CONDITION_SATISFIED_KHR 0x30F6 +#define EGL_SYNC_TYPE_KHR 0x30F7 +#define EGL_SYNC_REUSABLE_KHR 0x30FA +#define EGL_SYNC_FLUSH_COMMANDS_BIT_KHR 0x0001 +#define EGL_FOREVER_KHR 0xFFFFFFFFFFFFFFFFull +#define EGL_NO_SYNC_KHR ((EGLSyncKHR)0) +typedef EGLSyncKHR (EGLAPIENTRYP PFNEGLCREATESYNCKHRPROC) (EGLDisplay dpy, EGLenum type, const EGLint *attrib_list); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync); +typedef EGLint (EGLAPIENTRYP PFNEGLCLIENTWAITSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSIGNALSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETSYNCATTRIBKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateSyncKHR (EGLDisplay dpy, EGLenum type, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroySyncKHR (EGLDisplay dpy, EGLSyncKHR sync); +EGLAPI EGLint EGLAPIENTRY eglClientWaitSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout); +EGLAPI EGLBoolean EGLAPIENTRY eglSignalSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode); +EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncAttribKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value); +#endif +#endif /* KHRONOS_SUPPORT_INT64 */ +#endif /* EGL_KHR_reusable_sync */ + +#ifndef EGL_KHR_stream +#define EGL_KHR_stream 1 +typedef void *EGLStreamKHR; +typedef khronos_uint64_t EGLuint64KHR; +#ifdef KHRONOS_SUPPORT_INT64 +#define EGL_NO_STREAM_KHR ((EGLStreamKHR)0) +#define EGL_CONSUMER_LATENCY_USEC_KHR 0x3210 +#define EGL_PRODUCER_FRAME_KHR 0x3212 +#define EGL_CONSUMER_FRAME_KHR 0x3213 +#define EGL_STREAM_STATE_KHR 0x3214 +#define EGL_STREAM_STATE_CREATED_KHR 0x3215 +#define EGL_STREAM_STATE_CONNECTING_KHR 0x3216 +#define EGL_STREAM_STATE_EMPTY_KHR 0x3217 +#define EGL_STREAM_STATE_NEW_FRAME_AVAILABLE_KHR 0x3218 +#define EGL_STREAM_STATE_OLD_FRAME_AVAILABLE_KHR 0x3219 +#define EGL_STREAM_STATE_DISCONNECTED_KHR 0x321A +#define EGL_BAD_STREAM_KHR 0x321B +#define EGL_BAD_STATE_KHR 0x321C +typedef EGLStreamKHR (EGLAPIENTRYP PFNEGLCREATESTREAMKHRPROC) (EGLDisplay dpy, const EGLint *attrib_list); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSTREAMKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMATTRIBKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint value); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSTREAMKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint *value); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSTREAMU64KHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLuint64KHR *value); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLStreamKHR EGLAPIENTRY eglCreateStreamKHR (EGLDisplay dpy, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroyStreamKHR (EGLDisplay dpy, EGLStreamKHR stream); +EGLAPI EGLBoolean EGLAPIENTRY eglStreamAttribKHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint value); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryStreamKHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint *value); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryStreamu64KHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLuint64KHR *value); +#endif +#endif /* KHRONOS_SUPPORT_INT64 */ +#endif /* EGL_KHR_stream */ + +#ifndef EGL_KHR_stream_consumer_gltexture +#define EGL_KHR_stream_consumer_gltexture 1 +#ifdef EGL_KHR_stream +#define EGL_CONSUMER_ACQUIRE_TIMEOUT_USEC_KHR 0x321E +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMERGLTEXTUREEXTERNALKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMERACQUIREKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMERRELEASEKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerGLTextureExternalKHR (EGLDisplay dpy, EGLStreamKHR stream); +EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerAcquireKHR (EGLDisplay dpy, EGLStreamKHR stream); +EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerReleaseKHR (EGLDisplay dpy, EGLStreamKHR stream); +#endif +#endif /* EGL_KHR_stream */ +#endif /* EGL_KHR_stream_consumer_gltexture */ + +#ifndef EGL_KHR_stream_cross_process_fd +#define EGL_KHR_stream_cross_process_fd 1 +typedef int EGLNativeFileDescriptorKHR; +#ifdef EGL_KHR_stream +#define EGL_NO_FILE_DESCRIPTOR_KHR ((EGLNativeFileDescriptorKHR)(-1)) +typedef EGLNativeFileDescriptorKHR (EGLAPIENTRYP PFNEGLGETSTREAMFILEDESCRIPTORKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream); +typedef EGLStreamKHR (EGLAPIENTRYP PFNEGLCREATESTREAMFROMFILEDESCRIPTORKHRPROC) (EGLDisplay dpy, EGLNativeFileDescriptorKHR file_descriptor); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLNativeFileDescriptorKHR EGLAPIENTRY eglGetStreamFileDescriptorKHR (EGLDisplay dpy, EGLStreamKHR stream); +EGLAPI EGLStreamKHR EGLAPIENTRY eglCreateStreamFromFileDescriptorKHR (EGLDisplay dpy, EGLNativeFileDescriptorKHR file_descriptor); +#endif +#endif /* EGL_KHR_stream */ +#endif /* EGL_KHR_stream_cross_process_fd */ + +#ifndef EGL_KHR_stream_fifo +#define EGL_KHR_stream_fifo 1 +#ifdef EGL_KHR_stream +#define EGL_STREAM_FIFO_LENGTH_KHR 0x31FC +#define EGL_STREAM_TIME_NOW_KHR 0x31FD +#define EGL_STREAM_TIME_CONSUMER_KHR 0x31FE +#define EGL_STREAM_TIME_PRODUCER_KHR 0x31FF +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSTREAMTIMEKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLTimeKHR *value); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglQueryStreamTimeKHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLTimeKHR *value); +#endif +#endif /* EGL_KHR_stream */ +#endif /* EGL_KHR_stream_fifo */ + +#ifndef EGL_KHR_stream_producer_aldatalocator +#define EGL_KHR_stream_producer_aldatalocator 1 +#ifdef EGL_KHR_stream +#endif /* EGL_KHR_stream */ +#endif /* EGL_KHR_stream_producer_aldatalocator */ + +#ifndef EGL_KHR_stream_producer_eglsurface +#define EGL_KHR_stream_producer_eglsurface 1 +#ifdef EGL_KHR_stream +#define EGL_STREAM_BIT_KHR 0x0800 +typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATESTREAMPRODUCERSURFACEKHRPROC) (EGLDisplay dpy, EGLConfig config, EGLStreamKHR stream, const EGLint *attrib_list); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLSurface EGLAPIENTRY eglCreateStreamProducerSurfaceKHR (EGLDisplay dpy, EGLConfig config, EGLStreamKHR stream, const EGLint *attrib_list); +#endif +#endif /* EGL_KHR_stream */ +#endif /* EGL_KHR_stream_producer_eglsurface */ + +#ifndef EGL_KHR_surfaceless_context +#define EGL_KHR_surfaceless_context 1 +#endif /* EGL_KHR_surfaceless_context */ + +#ifndef EGL_KHR_vg_parent_image +#define EGL_KHR_vg_parent_image 1 +#define EGL_VG_PARENT_IMAGE_KHR 0x30BA +#endif /* EGL_KHR_vg_parent_image */ + +#ifndef EGL_KHR_wait_sync +#define EGL_KHR_wait_sync 1 +typedef EGLint (EGLAPIENTRYP PFNEGLWAITSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLint EGLAPIENTRY eglWaitSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags); +#endif +#endif /* EGL_KHR_wait_sync */ + +#ifndef EGL_ANDROID_blob_cache +#define EGL_ANDROID_blob_cache 1 +typedef khronos_ssize_t EGLsizeiANDROID; +typedef void (*EGLSetBlobFuncANDROID) (const void *key, EGLsizeiANDROID keySize, const void *value, EGLsizeiANDROID valueSize); +typedef EGLsizeiANDROID (*EGLGetBlobFuncANDROID) (const void *key, EGLsizeiANDROID keySize, void *value, EGLsizeiANDROID valueSize); +typedef void (EGLAPIENTRYP PFNEGLSETBLOBCACHEFUNCSANDROIDPROC) (EGLDisplay dpy, EGLSetBlobFuncANDROID set, EGLGetBlobFuncANDROID get); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI void EGLAPIENTRY eglSetBlobCacheFuncsANDROID (EGLDisplay dpy, EGLSetBlobFuncANDROID set, EGLGetBlobFuncANDROID get); +#endif +#endif /* EGL_ANDROID_blob_cache */ + +#ifndef EGL_ANDROID_framebuffer_target +#define EGL_ANDROID_framebuffer_target 1 +#define EGL_FRAMEBUFFER_TARGET_ANDROID 0x3147 +#endif /* EGL_ANDROID_framebuffer_target */ + +#ifndef EGL_ANDROID_image_native_buffer +#define EGL_ANDROID_image_native_buffer 1 +#define EGL_NATIVE_BUFFER_ANDROID 0x3140 +#endif /* EGL_ANDROID_image_native_buffer */ + +#ifndef EGL_ANDROID_native_fence_sync +#define EGL_ANDROID_native_fence_sync 1 +#define EGL_SYNC_NATIVE_FENCE_ANDROID 0x3144 +#define EGL_SYNC_NATIVE_FENCE_FD_ANDROID 0x3145 +#define EGL_SYNC_NATIVE_FENCE_SIGNALED_ANDROID 0x3146 +#define EGL_NO_NATIVE_FENCE_FD_ANDROID -1 +typedef EGLint (EGLAPIENTRYP PFNEGLDUPNATIVEFENCEFDANDROIDPROC) (EGLDisplay dpy, EGLSyncKHR sync); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLint EGLAPIENTRY eglDupNativeFenceFDANDROID (EGLDisplay dpy, EGLSyncKHR sync); +#endif +#endif /* EGL_ANDROID_native_fence_sync */ + +#ifndef EGL_ANDROID_recordable +#define EGL_ANDROID_recordable 1 +#define EGL_RECORDABLE_ANDROID 0x3142 +#endif /* EGL_ANDROID_recordable */ + +#ifndef EGL_ANGLE_d3d_share_handle_client_buffer +#define EGL_ANGLE_d3d_share_handle_client_buffer 1 +#define EGL_D3D_TEXTURE_2D_SHARE_HANDLE_ANGLE 0x3200 +#endif /* EGL_ANGLE_d3d_share_handle_client_buffer */ + +#ifndef EGL_ANGLE_query_surface_pointer +#define EGL_ANGLE_query_surface_pointer 1 +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSURFACEPOINTERANGLEPROC) (EGLDisplay dpy, EGLSurface surface, EGLint attribute, void **value); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurfacePointerANGLE (EGLDisplay dpy, EGLSurface surface, EGLint attribute, void **value); +#endif +#endif /* EGL_ANGLE_query_surface_pointer */ + +#ifndef EGL_ANGLE_surface_d3d_texture_2d_share_handle +#define EGL_ANGLE_surface_d3d_texture_2d_share_handle 1 +#endif /* EGL_ANGLE_surface_d3d_texture_2d_share_handle */ + +#ifndef EGL_ARM_pixmap_multisample_discard +#define EGL_ARM_pixmap_multisample_discard 1 +#define EGL_DISCARD_SAMPLES_ARM 0x3286 +#endif /* EGL_ARM_pixmap_multisample_discard */ + +#ifndef EGL_EXT_buffer_age +#define EGL_EXT_buffer_age 1 +#define EGL_BUFFER_AGE_EXT 0x313D +#endif /* EGL_EXT_buffer_age */ + +#ifndef EGL_EXT_client_extensions +#define EGL_EXT_client_extensions 1 +#endif /* EGL_EXT_client_extensions */ + +#ifndef EGL_EXT_create_context_robustness +#define EGL_EXT_create_context_robustness 1 +#define EGL_CONTEXT_OPENGL_ROBUST_ACCESS_EXT 0x30BF +#define EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY_EXT 0x3138 +#define EGL_NO_RESET_NOTIFICATION_EXT 0x31BE +#define EGL_LOSE_CONTEXT_ON_RESET_EXT 0x31BF +#endif /* EGL_EXT_create_context_robustness */ + +#ifndef EGL_EXT_device_base +#define EGL_EXT_device_base 1 +typedef void *EGLDeviceEXT; +#define EGL_NO_DEVICE_EXT ((EGLDeviceEXT)(0)) +#define EGL_BAD_DEVICE_EXT 0x322B +#define EGL_DEVICE_EXT 0x322C +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDEVICEATTRIBEXTPROC) (EGLDeviceEXT device, EGLint attribute, EGLAttrib *value); +typedef const char *(EGLAPIENTRYP PFNEGLQUERYDEVICESTRINGEXTPROC) (EGLDeviceEXT device, EGLint name); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDEVICESEXTPROC) (EGLint max_devices, EGLDeviceEXT *devices, EGLint *num_devices); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDISPLAYATTRIBEXTPROC) (EGLDisplay dpy, EGLint attribute, EGLAttrib *value); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglQueryDeviceAttribEXT (EGLDeviceEXT device, EGLint attribute, EGLAttrib *value); +EGLAPI const char *EGLAPIENTRY eglQueryDeviceStringEXT (EGLDeviceEXT device, EGLint name); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryDevicesEXT (EGLint max_devices, EGLDeviceEXT *devices, EGLint *num_devices); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryDisplayAttribEXT (EGLDisplay dpy, EGLint attribute, EGLAttrib *value); +#endif +#endif /* EGL_EXT_device_base */ + +#ifndef EGL_EXT_device_drm +#define EGL_EXT_device_drm 1 +#define EGL_DRM_DEVICE_FILE_EXT 0x3233 +#endif /* EGL_EXT_device_drm */ + +#ifndef EGL_EXT_device_openwf +#define EGL_EXT_device_openwf 1 +#define EGL_OPENWF_DEVICE_ID_EXT 0x3237 +#endif /* EGL_EXT_device_openwf */ + +#ifndef EGL_EXT_image_dma_buf_import +#define EGL_EXT_image_dma_buf_import 1 +#define EGL_LINUX_DMA_BUF_EXT 0x3270 +#define EGL_LINUX_DRM_FOURCC_EXT 0x3271 +#define EGL_DMA_BUF_PLANE0_FD_EXT 0x3272 +#define EGL_DMA_BUF_PLANE0_OFFSET_EXT 0x3273 +#define EGL_DMA_BUF_PLANE0_PITCH_EXT 0x3274 +#define EGL_DMA_BUF_PLANE1_FD_EXT 0x3275 +#define EGL_DMA_BUF_PLANE1_OFFSET_EXT 0x3276 +#define EGL_DMA_BUF_PLANE1_PITCH_EXT 0x3277 +#define EGL_DMA_BUF_PLANE2_FD_EXT 0x3278 +#define EGL_DMA_BUF_PLANE2_OFFSET_EXT 0x3279 +#define EGL_DMA_BUF_PLANE2_PITCH_EXT 0x327A +#define EGL_YUV_COLOR_SPACE_HINT_EXT 0x327B +#define EGL_SAMPLE_RANGE_HINT_EXT 0x327C +#define EGL_YUV_CHROMA_HORIZONTAL_SITING_HINT_EXT 0x327D +#define EGL_YUV_CHROMA_VERTICAL_SITING_HINT_EXT 0x327E +#define EGL_ITU_REC601_EXT 0x327F +#define EGL_ITU_REC709_EXT 0x3280 +#define EGL_ITU_REC2020_EXT 0x3281 +#define EGL_YUV_FULL_RANGE_EXT 0x3282 +#define EGL_YUV_NARROW_RANGE_EXT 0x3283 +#define EGL_YUV_CHROMA_SITING_0_EXT 0x3284 +#define EGL_YUV_CHROMA_SITING_0_5_EXT 0x3285 +#endif /* EGL_EXT_image_dma_buf_import */ + +#ifndef EGL_EXT_multiview_window +#define EGL_EXT_multiview_window 1 +#define EGL_MULTIVIEW_VIEW_COUNT_EXT 0x3134 +#endif /* EGL_EXT_multiview_window */ + +#ifndef EGL_EXT_output_base +#define EGL_EXT_output_base 1 +typedef void *EGLOutputLayerEXT; +typedef void *EGLOutputPortEXT; +#define EGL_NO_OUTPUT_LAYER_EXT ((EGLOutputLayerEXT)0) +#define EGL_NO_OUTPUT_PORT_EXT ((EGLOutputPortEXT)0) +#define EGL_BAD_OUTPUT_LAYER_EXT 0x322D +#define EGL_BAD_OUTPUT_PORT_EXT 0x322E +#define EGL_SWAP_INTERVAL_EXT 0x322F +typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETOUTPUTLAYERSEXTPROC) (EGLDisplay dpy, const EGLAttrib *attrib_list, EGLOutputLayerEXT *layers, EGLint max_layers, EGLint *num_layers); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETOUTPUTPORTSEXTPROC) (EGLDisplay dpy, const EGLAttrib *attrib_list, EGLOutputPortEXT *ports, EGLint max_ports, EGLint *num_ports); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLOUTPUTLAYERATTRIBEXTPROC) (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint attribute, EGLAttrib value); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYOUTPUTLAYERATTRIBEXTPROC) (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint attribute, EGLAttrib *value); +typedef const char *(EGLAPIENTRYP PFNEGLQUERYOUTPUTLAYERSTRINGEXTPROC) (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint name); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLOUTPUTPORTATTRIBEXTPROC) (EGLDisplay dpy, EGLOutputPortEXT port, EGLint attribute, EGLAttrib value); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYOUTPUTPORTATTRIBEXTPROC) (EGLDisplay dpy, EGLOutputPortEXT port, EGLint attribute, EGLAttrib *value); +typedef const char *(EGLAPIENTRYP PFNEGLQUERYOUTPUTPORTSTRINGEXTPROC) (EGLDisplay dpy, EGLOutputPortEXT port, EGLint name); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglGetOutputLayersEXT (EGLDisplay dpy, const EGLAttrib *attrib_list, EGLOutputLayerEXT *layers, EGLint max_layers, EGLint *num_layers); +EGLAPI EGLBoolean EGLAPIENTRY eglGetOutputPortsEXT (EGLDisplay dpy, const EGLAttrib *attrib_list, EGLOutputPortEXT *ports, EGLint max_ports, EGLint *num_ports); +EGLAPI EGLBoolean EGLAPIENTRY eglOutputLayerAttribEXT (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint attribute, EGLAttrib value); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryOutputLayerAttribEXT (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint attribute, EGLAttrib *value); +EGLAPI const char *EGLAPIENTRY eglQueryOutputLayerStringEXT (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint name); +EGLAPI EGLBoolean EGLAPIENTRY eglOutputPortAttribEXT (EGLDisplay dpy, EGLOutputPortEXT port, EGLint attribute, EGLAttrib value); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryOutputPortAttribEXT (EGLDisplay dpy, EGLOutputPortEXT port, EGLint attribute, EGLAttrib *value); +EGLAPI const char *EGLAPIENTRY eglQueryOutputPortStringEXT (EGLDisplay dpy, EGLOutputPortEXT port, EGLint name); +#endif +#endif /* EGL_EXT_output_base */ + +#ifndef EGL_EXT_output_drm +#define EGL_EXT_output_drm 1 +#define EGL_DRM_CRTC_EXT 0x3234 +#define EGL_DRM_PLANE_EXT 0x3235 +#define EGL_DRM_CONNECTOR_EXT 0x3236 +#endif /* EGL_EXT_output_drm */ + +#ifndef EGL_EXT_output_openwf +#define EGL_EXT_output_openwf 1 +#define EGL_OPENWF_PIPELINE_ID_EXT 0x3238 +#define EGL_OPENWF_PORT_ID_EXT 0x3239 +#endif /* EGL_EXT_output_openwf */ + +#ifndef EGL_EXT_platform_base +#define EGL_EXT_platform_base 1 +typedef EGLDisplay (EGLAPIENTRYP PFNEGLGETPLATFORMDISPLAYEXTPROC) (EGLenum platform, void *native_display, const EGLint *attrib_list); +typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPLATFORMWINDOWSURFACEEXTPROC) (EGLDisplay dpy, EGLConfig config, void *native_window, const EGLint *attrib_list); +typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPLATFORMPIXMAPSURFACEEXTPROC) (EGLDisplay dpy, EGLConfig config, void *native_pixmap, const EGLint *attrib_list); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLDisplay EGLAPIENTRY eglGetPlatformDisplayEXT (EGLenum platform, void *native_display, const EGLint *attrib_list); +EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformWindowSurfaceEXT (EGLDisplay dpy, EGLConfig config, void *native_window, const EGLint *attrib_list); +EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformPixmapSurfaceEXT (EGLDisplay dpy, EGLConfig config, void *native_pixmap, const EGLint *attrib_list); +#endif +#endif /* EGL_EXT_platform_base */ + +#ifndef EGL_EXT_platform_device +#define EGL_EXT_platform_device 1 +#define EGL_PLATFORM_DEVICE_EXT 0x313F +#endif /* EGL_EXT_platform_device */ + +#ifndef EGL_EXT_platform_wayland +#define EGL_EXT_platform_wayland 1 +#define EGL_PLATFORM_WAYLAND_EXT 0x31D8 +#endif /* EGL_EXT_platform_wayland */ + +#ifndef EGL_EXT_platform_x11 +#define EGL_EXT_platform_x11 1 +#define EGL_PLATFORM_X11_EXT 0x31D5 +#define EGL_PLATFORM_X11_SCREEN_EXT 0x31D6 +#endif /* EGL_EXT_platform_x11 */ + +#ifndef EGL_EXT_protected_surface +#define EGL_EXT_protected_surface 1 +#define EGL_PROTECTED_CONTENT_EXT 0x32C0 +#endif /* EGL_EXT_protected_surface */ + +#ifndef EGL_EXT_stream_consumer_egloutput +#define EGL_EXT_stream_consumer_egloutput 1 +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMEROUTPUTEXTPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLOutputLayerEXT layer); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerOutputEXT (EGLDisplay dpy, EGLStreamKHR stream, EGLOutputLayerEXT layer); +#endif +#endif /* EGL_EXT_stream_consumer_egloutput */ + +#ifndef EGL_EXT_swap_buffers_with_damage +#define EGL_EXT_swap_buffers_with_damage 1 +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSWITHDAMAGEEXTPROC) (EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersWithDamageEXT (EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects); +#endif +#endif /* EGL_EXT_swap_buffers_with_damage */ + +#ifndef EGL_HI_clientpixmap +#define EGL_HI_clientpixmap 1 +struct EGLClientPixmapHI { + void *pData; + EGLint iWidth; + EGLint iHeight; + EGLint iStride; +}; +#define EGL_CLIENT_PIXMAP_POINTER_HI 0x8F74 +typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPIXMAPSURFACEHIPROC) (EGLDisplay dpy, EGLConfig config, struct EGLClientPixmapHI *pixmap); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLSurface EGLAPIENTRY eglCreatePixmapSurfaceHI (EGLDisplay dpy, EGLConfig config, struct EGLClientPixmapHI *pixmap); +#endif +#endif /* EGL_HI_clientpixmap */ + +#ifndef EGL_HI_colorformats +#define EGL_HI_colorformats 1 +#define EGL_COLOR_FORMAT_HI 0x8F70 +#define EGL_COLOR_RGB_HI 0x8F71 +#define EGL_COLOR_RGBA_HI 0x8F72 +#define EGL_COLOR_ARGB_HI 0x8F73 +#endif /* EGL_HI_colorformats */ + +#ifndef EGL_IMG_context_priority +#define EGL_IMG_context_priority 1 +#define EGL_CONTEXT_PRIORITY_LEVEL_IMG 0x3100 +#define EGL_CONTEXT_PRIORITY_HIGH_IMG 0x3101 +#define EGL_CONTEXT_PRIORITY_MEDIUM_IMG 0x3102 +#define EGL_CONTEXT_PRIORITY_LOW_IMG 0x3103 +#endif /* EGL_IMG_context_priority */ + +#ifndef EGL_MESA_drm_image +#define EGL_MESA_drm_image 1 +#define EGL_DRM_BUFFER_FORMAT_MESA 0x31D0 +#define EGL_DRM_BUFFER_USE_MESA 0x31D1 +#define EGL_DRM_BUFFER_FORMAT_ARGB32_MESA 0x31D2 +#define EGL_DRM_BUFFER_MESA 0x31D3 +#define EGL_DRM_BUFFER_STRIDE_MESA 0x31D4 +#define EGL_DRM_BUFFER_USE_SCANOUT_MESA 0x00000001 +#define EGL_DRM_BUFFER_USE_SHARE_MESA 0x00000002 +typedef EGLImageKHR (EGLAPIENTRYP PFNEGLCREATEDRMIMAGEMESAPROC) (EGLDisplay dpy, const EGLint *attrib_list); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLEXPORTDRMIMAGEMESAPROC) (EGLDisplay dpy, EGLImageKHR image, EGLint *name, EGLint *handle, EGLint *stride); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLImageKHR EGLAPIENTRY eglCreateDRMImageMESA (EGLDisplay dpy, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglExportDRMImageMESA (EGLDisplay dpy, EGLImageKHR image, EGLint *name, EGLint *handle, EGLint *stride); +#endif +#endif /* EGL_MESA_drm_image */ + +#ifndef EGL_MESA_platform_gbm +#define EGL_MESA_platform_gbm 1 +#define EGL_PLATFORM_GBM_MESA 0x31D7 +#endif /* EGL_MESA_platform_gbm */ + +#ifndef EGL_NOK_swap_region +#define EGL_NOK_swap_region 1 +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSREGIONNOKPROC) (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint *rects); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersRegionNOK (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint *rects); +#endif +#endif /* EGL_NOK_swap_region */ + +#ifndef EGL_NOK_swap_region2 +#define EGL_NOK_swap_region2 1 +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSREGION2NOKPROC) (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint *rects); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersRegion2NOK (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint *rects); +#endif +#endif /* EGL_NOK_swap_region2 */ + +#ifndef EGL_NOK_texture_from_pixmap +#define EGL_NOK_texture_from_pixmap 1 +#define EGL_Y_INVERTED_NOK 0x307F +#endif /* EGL_NOK_texture_from_pixmap */ + +#ifndef EGL_NV_3dvision_surface +#define EGL_NV_3dvision_surface 1 +#define EGL_AUTO_STEREO_NV 0x3136 +#endif /* EGL_NV_3dvision_surface */ + +#ifndef EGL_NV_coverage_sample +#define EGL_NV_coverage_sample 1 +#define EGL_COVERAGE_BUFFERS_NV 0x30E0 +#define EGL_COVERAGE_SAMPLES_NV 0x30E1 +#endif /* EGL_NV_coverage_sample */ + +#ifndef EGL_NV_coverage_sample_resolve +#define EGL_NV_coverage_sample_resolve 1 +#define EGL_COVERAGE_SAMPLE_RESOLVE_NV 0x3131 +#define EGL_COVERAGE_SAMPLE_RESOLVE_DEFAULT_NV 0x3132 +#define EGL_COVERAGE_SAMPLE_RESOLVE_NONE_NV 0x3133 +#endif /* EGL_NV_coverage_sample_resolve */ + +#ifndef EGL_NV_cuda_event +#define EGL_NV_cuda_event 1 +#define EGL_CUDA_EVENT_HANDLE_NV 0x323B +#define EGL_SYNC_CUDA_EVENT_NV 0x323C +#define EGL_SYNC_CUDA_EVENT_COMPLETE_NV 0x323D +#endif /* EGL_NV_cuda_event */ + +#ifndef EGL_NV_depth_nonlinear +#define EGL_NV_depth_nonlinear 1 +#define EGL_DEPTH_ENCODING_NV 0x30E2 +#define EGL_DEPTH_ENCODING_NONE_NV 0 +#define EGL_DEPTH_ENCODING_NONLINEAR_NV 0x30E3 +#endif /* EGL_NV_depth_nonlinear */ + +#ifndef EGL_NV_device_cuda +#define EGL_NV_device_cuda 1 +#define EGL_CUDA_DEVICE_NV 0x323A +#endif /* EGL_NV_device_cuda */ + +#ifndef EGL_NV_native_query +#define EGL_NV_native_query 1 +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYNATIVEDISPLAYNVPROC) (EGLDisplay dpy, EGLNativeDisplayType *display_id); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYNATIVEWINDOWNVPROC) (EGLDisplay dpy, EGLSurface surf, EGLNativeWindowType *window); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYNATIVEPIXMAPNVPROC) (EGLDisplay dpy, EGLSurface surf, EGLNativePixmapType *pixmap); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglQueryNativeDisplayNV (EGLDisplay dpy, EGLNativeDisplayType *display_id); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryNativeWindowNV (EGLDisplay dpy, EGLSurface surf, EGLNativeWindowType *window); +EGLAPI EGLBoolean EGLAPIENTRY eglQueryNativePixmapNV (EGLDisplay dpy, EGLSurface surf, EGLNativePixmapType *pixmap); +#endif +#endif /* EGL_NV_native_query */ + +#ifndef EGL_NV_post_convert_rounding +#define EGL_NV_post_convert_rounding 1 +#endif /* EGL_NV_post_convert_rounding */ + +#ifndef EGL_NV_post_sub_buffer +#define EGL_NV_post_sub_buffer 1 +#define EGL_POST_SUB_BUFFER_SUPPORTED_NV 0x30BE +typedef EGLBoolean (EGLAPIENTRYP PFNEGLPOSTSUBBUFFERNVPROC) (EGLDisplay dpy, EGLSurface surface, EGLint x, EGLint y, EGLint width, EGLint height); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLBoolean EGLAPIENTRY eglPostSubBufferNV (EGLDisplay dpy, EGLSurface surface, EGLint x, EGLint y, EGLint width, EGLint height); +#endif +#endif /* EGL_NV_post_sub_buffer */ + +#ifndef EGL_NV_stream_sync +#define EGL_NV_stream_sync 1 +#define EGL_SYNC_NEW_FRAME_NV 0x321F +typedef EGLSyncKHR (EGLAPIENTRYP PFNEGLCREATESTREAMSYNCNVPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum type, const EGLint *attrib_list); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateStreamSyncNV (EGLDisplay dpy, EGLStreamKHR stream, EGLenum type, const EGLint *attrib_list); +#endif +#endif /* EGL_NV_stream_sync */ + +#ifndef EGL_NV_sync +#define EGL_NV_sync 1 +typedef void *EGLSyncNV; +typedef khronos_utime_nanoseconds_t EGLTimeNV; +#ifdef KHRONOS_SUPPORT_INT64 +#define EGL_SYNC_PRIOR_COMMANDS_COMPLETE_NV 0x30E6 +#define EGL_SYNC_STATUS_NV 0x30E7 +#define EGL_SIGNALED_NV 0x30E8 +#define EGL_UNSIGNALED_NV 0x30E9 +#define EGL_SYNC_FLUSH_COMMANDS_BIT_NV 0x0001 +#define EGL_FOREVER_NV 0xFFFFFFFFFFFFFFFFull +#define EGL_ALREADY_SIGNALED_NV 0x30EA +#define EGL_TIMEOUT_EXPIRED_NV 0x30EB +#define EGL_CONDITION_SATISFIED_NV 0x30EC +#define EGL_SYNC_TYPE_NV 0x30ED +#define EGL_SYNC_CONDITION_NV 0x30EE +#define EGL_SYNC_FENCE_NV 0x30EF +#define EGL_NO_SYNC_NV ((EGLSyncNV)0) +typedef EGLSyncNV (EGLAPIENTRYP PFNEGLCREATEFENCESYNCNVPROC) (EGLDisplay dpy, EGLenum condition, const EGLint *attrib_list); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSYNCNVPROC) (EGLSyncNV sync); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLFENCENVPROC) (EGLSyncNV sync); +typedef EGLint (EGLAPIENTRYP PFNEGLCLIENTWAITSYNCNVPROC) (EGLSyncNV sync, EGLint flags, EGLTimeNV timeout); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLSIGNALSYNCNVPROC) (EGLSyncNV sync, EGLenum mode); +typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETSYNCATTRIBNVPROC) (EGLSyncNV sync, EGLint attribute, EGLint *value); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLSyncNV EGLAPIENTRY eglCreateFenceSyncNV (EGLDisplay dpy, EGLenum condition, const EGLint *attrib_list); +EGLAPI EGLBoolean EGLAPIENTRY eglDestroySyncNV (EGLSyncNV sync); +EGLAPI EGLBoolean EGLAPIENTRY eglFenceNV (EGLSyncNV sync); +EGLAPI EGLint EGLAPIENTRY eglClientWaitSyncNV (EGLSyncNV sync, EGLint flags, EGLTimeNV timeout); +EGLAPI EGLBoolean EGLAPIENTRY eglSignalSyncNV (EGLSyncNV sync, EGLenum mode); +EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncAttribNV (EGLSyncNV sync, EGLint attribute, EGLint *value); +#endif +#endif /* KHRONOS_SUPPORT_INT64 */ +#endif /* EGL_NV_sync */ + +#ifndef EGL_NV_system_time +#define EGL_NV_system_time 1 +typedef khronos_utime_nanoseconds_t EGLuint64NV; +#ifdef KHRONOS_SUPPORT_INT64 +typedef EGLuint64NV (EGLAPIENTRYP PFNEGLGETSYSTEMTIMEFREQUENCYNVPROC) (void); +typedef EGLuint64NV (EGLAPIENTRYP PFNEGLGETSYSTEMTIMENVPROC) (void); +#ifdef EGL_EGLEXT_PROTOTYPES +EGLAPI EGLuint64NV EGLAPIENTRY eglGetSystemTimeFrequencyNV (void); +EGLAPI EGLuint64NV EGLAPIENTRY eglGetSystemTimeNV (void); +#endif +#endif /* KHRONOS_SUPPORT_INT64 */ +#endif /* EGL_NV_system_time */ + +#ifndef EGL_TIZEN_image_native_buffer +#define EGL_TIZEN_image_native_buffer 1 +#define EGL_NATIVE_BUFFER_TIZEN 0x32A0 +#endif /* EGL_TIZEN_image_native_buffer */ + +#ifndef EGL_TIZEN_image_native_surface +#define EGL_TIZEN_image_native_surface 1 +#define EGL_NATIVE_SURFACE_TIZEN 0x32A1 +#endif /* EGL_TIZEN_image_native_surface */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/opengles-3.1/include/EGL/eglplatform.h b/opengles-3.1/include/EGL/eglplatform.h new file mode 100644 index 0000000000..cc4b100c30 --- /dev/null +++ b/opengles-3.1/include/EGL/eglplatform.h @@ -0,0 +1,122 @@ +#ifndef __eglplatform_h_ +#define __eglplatform_h_ + +/* +** Copyright (c) 2007-2009 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ + +/* Platform-specific types and definitions for egl.h + * $Revision: 12306 $ on $Date: 2010-08-25 09:51:28 -0700 (Wed, 25 Aug 2010) $ + * + * Adopters may modify khrplatform.h and this file to suit their platform. + * You are encouraged to submit all modifications to the Khronos group so that + * they can be included in future versions of this file. Please submit changes + * by sending them to the public Khronos Bugzilla (http://khronos.org/bugzilla) + * by filing a bug against product "EGL" component "Registry". + */ + +#include + +/* Macros used in EGL function prototype declarations. + * + * EGL functions should be prototyped as: + * + * EGLAPI return-type EGLAPIENTRY eglFunction(arguments); + * typedef return-type (EXPAPIENTRYP PFNEGLFUNCTIONPROC) (arguments); + * + * KHRONOS_APICALL and KHRONOS_APIENTRY are defined in KHR/khrplatform.h + */ + +#ifndef EGLAPI +#define EGLAPI KHRONOS_APICALL +#endif + +#ifndef EGLAPIENTRY +#define EGLAPIENTRY KHRONOS_APIENTRY +#endif +#define EGLAPIENTRYP EGLAPIENTRY* + +/* The types NativeDisplayType, NativeWindowType, and NativePixmapType + * are aliases of window-system-dependent types, such as X Display * or + * Windows Device Context. They must be defined in platform-specific + * code below. The EGL-prefixed versions of Native*Type are the same + * types, renamed in EGL 1.3 so all types in the API start with "EGL". + * + * Khronos STRONGLY RECOMMENDS that you use the default definitions + * provided below, since these changes affect both binary and source + * portability of applications using EGL running on different EGL + * implementations. + */ + +#if defined(_WIN32) || defined(__VC32__) && !defined(__CYGWIN__) && !defined(__SCITECH_SNAP__) /* Win32 and WinCE */ +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN 1 +#endif +#include + +typedef HDC EGLNativeDisplayType; +typedef HBITMAP EGLNativePixmapType; +typedef HWND EGLNativeWindowType; + +#elif defined(__WINSCW__) || defined(__SYMBIAN32__) /* Symbian */ + +typedef int EGLNativeDisplayType; +typedef void *EGLNativeWindowType; +typedef void *EGLNativePixmapType; + +#elif (defined(__arm__) || defined(__aarch64__)) && defined(__gnu_linux__) /* ARM Linux Mali */ +#include + +typedef void* EGLNativeDisplayType; +typedef void* EGLNativePixmapType; +typedef fbdev_window* EGLNativeWindowType; + +#elif defined(__unix__) + +/* X11 (tentative) */ +#include +#include + +typedef Display *EGLNativeDisplayType; +typedef Pixmap EGLNativePixmapType; +typedef Window EGLNativeWindowType; + +#else +#error "Platform not recognized" +#endif + +/* EGL 1.2 types, renamed for consistency in EGL 1.3 */ +typedef EGLNativeDisplayType NativeDisplayType; +typedef EGLNativePixmapType NativePixmapType; +typedef EGLNativeWindowType NativeWindowType; + + +/* Define EGLint. This must be a signed integral type large enough to contain + * all legal attribute names and values passed into and out of EGL, whether + * their type is boolean, bitmask, enumerant (symbolic constant), integer, + * handle, or other. While in general a 32-bit integer will suffice, if + * handles are 64 bit types, then EGLint should be defined as a signed 64-bit + * integer type. + */ +typedef khronos_int32_t EGLint; + +#endif /* __eglplatform_h */ diff --git a/opengles-3.1/include/GLES/gl.h b/opengles-3.1/include/GLES/gl.h new file mode 100644 index 0000000000..5b8d85a920 --- /dev/null +++ b/opengles-3.1/include/GLES/gl.h @@ -0,0 +1,770 @@ +#ifndef __gl_h_ +#define __gl_h_ + +/* $Revision: 10601 $ on $Date:: 2010-03-04 22:15:27 -0800 #$ */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This document is licensed under the SGI Free Software B License Version + * 2.0. For details, see http://oss.sgi.com/projects/FreeB/ . + */ + +typedef void GLvoid; +typedef char GLchar; +typedef unsigned int GLenum; +typedef unsigned char GLboolean; +typedef unsigned int GLbitfield; +typedef khronos_int8_t GLbyte; +typedef short GLshort; +typedef int GLint; +typedef int GLsizei; +typedef khronos_uint8_t GLubyte; +typedef unsigned short GLushort; +typedef unsigned int GLuint; +typedef khronos_float_t GLfloat; +typedef khronos_float_t GLclampf; +typedef khronos_int32_t GLfixed; +typedef khronos_int32_t GLclampx; + +typedef khronos_intptr_t GLintptr; +typedef khronos_ssize_t GLsizeiptr; + + +/*************************************************************/ + +/* OpenGL ES core versions */ +#define GL_VERSION_ES_CM_1_0 1 +#define GL_VERSION_ES_CL_1_0 1 +#define GL_VERSION_ES_CM_1_1 1 +#define GL_VERSION_ES_CL_1_1 1 + +/* ClearBufferMask */ +#define GL_DEPTH_BUFFER_BIT 0x00000100 +#define GL_STENCIL_BUFFER_BIT 0x00000400 +#define GL_COLOR_BUFFER_BIT 0x00004000 + +/* Boolean */ +#define GL_FALSE 0 +#define GL_TRUE 1 + +/* BeginMode */ +#define GL_POINTS 0x0000 +#define GL_LINES 0x0001 +#define GL_LINE_LOOP 0x0002 +#define GL_LINE_STRIP 0x0003 +#define GL_TRIANGLES 0x0004 +#define GL_TRIANGLE_STRIP 0x0005 +#define GL_TRIANGLE_FAN 0x0006 + +/* AlphaFunction */ +#define GL_NEVER 0x0200 +#define GL_LESS 0x0201 +#define GL_EQUAL 0x0202 +#define GL_LEQUAL 0x0203 +#define GL_GREATER 0x0204 +#define GL_NOTEQUAL 0x0205 +#define GL_GEQUAL 0x0206 +#define GL_ALWAYS 0x0207 + +/* BlendingFactorDest */ +#define GL_ZERO 0 +#define GL_ONE 1 +#define GL_SRC_COLOR 0x0300 +#define GL_ONE_MINUS_SRC_COLOR 0x0301 +#define GL_SRC_ALPHA 0x0302 +#define GL_ONE_MINUS_SRC_ALPHA 0x0303 +#define GL_DST_ALPHA 0x0304 +#define GL_ONE_MINUS_DST_ALPHA 0x0305 + +/* BlendingFactorSrc */ +/* GL_ZERO */ +/* GL_ONE */ +#define GL_DST_COLOR 0x0306 +#define GL_ONE_MINUS_DST_COLOR 0x0307 +#define GL_SRC_ALPHA_SATURATE 0x0308 +/* GL_SRC_ALPHA */ +/* GL_ONE_MINUS_SRC_ALPHA */ +/* GL_DST_ALPHA */ +/* GL_ONE_MINUS_DST_ALPHA */ + +/* ClipPlaneName */ +#define GL_CLIP_PLANE0 0x3000 +#define GL_CLIP_PLANE1 0x3001 +#define GL_CLIP_PLANE2 0x3002 +#define GL_CLIP_PLANE3 0x3003 +#define GL_CLIP_PLANE4 0x3004 +#define GL_CLIP_PLANE5 0x3005 + +/* ColorMaterialFace */ +/* GL_FRONT_AND_BACK */ + +/* ColorMaterialParameter */ +/* GL_AMBIENT_AND_DIFFUSE */ + +/* ColorPointerType */ +/* GL_UNSIGNED_BYTE */ +/* GL_FLOAT */ +/* GL_FIXED */ + +/* CullFaceMode */ +#define GL_FRONT 0x0404 +#define GL_BACK 0x0405 +#define GL_FRONT_AND_BACK 0x0408 + +/* DepthFunction */ +/* GL_NEVER */ +/* GL_LESS */ +/* GL_EQUAL */ +/* GL_LEQUAL */ +/* GL_GREATER */ +/* GL_NOTEQUAL */ +/* GL_GEQUAL */ +/* GL_ALWAYS */ + +/* EnableCap */ +#define GL_FOG 0x0B60 +#define GL_LIGHTING 0x0B50 +#define GL_TEXTURE_2D 0x0DE1 +#define GL_CULL_FACE 0x0B44 +#define GL_ALPHA_TEST 0x0BC0 +#define GL_BLEND 0x0BE2 +#define GL_COLOR_LOGIC_OP 0x0BF2 +#define GL_DITHER 0x0BD0 +#define GL_STENCIL_TEST 0x0B90 +#define GL_DEPTH_TEST 0x0B71 +/* GL_LIGHT0 */ +/* GL_LIGHT1 */ +/* GL_LIGHT2 */ +/* GL_LIGHT3 */ +/* GL_LIGHT4 */ +/* GL_LIGHT5 */ +/* GL_LIGHT6 */ +/* GL_LIGHT7 */ +#define GL_POINT_SMOOTH 0x0B10 +#define GL_LINE_SMOOTH 0x0B20 +#define GL_SCISSOR_TEST 0x0C11 +#define GL_COLOR_MATERIAL 0x0B57 +#define GL_NORMALIZE 0x0BA1 +#define GL_RESCALE_NORMAL 0x803A +#define GL_POLYGON_OFFSET_FILL 0x8037 +#define GL_VERTEX_ARRAY 0x8074 +#define GL_NORMAL_ARRAY 0x8075 +#define GL_COLOR_ARRAY 0x8076 +#define GL_TEXTURE_COORD_ARRAY 0x8078 +#define GL_MULTISAMPLE 0x809D +#define GL_SAMPLE_ALPHA_TO_COVERAGE 0x809E +#define GL_SAMPLE_ALPHA_TO_ONE 0x809F +#define GL_SAMPLE_COVERAGE 0x80A0 + +/* ErrorCode */ +#define GL_NO_ERROR 0 +#define GL_INVALID_ENUM 0x0500 +#define GL_INVALID_VALUE 0x0501 +#define GL_INVALID_OPERATION 0x0502 +#define GL_STACK_OVERFLOW 0x0503 +#define GL_STACK_UNDERFLOW 0x0504 +#define GL_OUT_OF_MEMORY 0x0505 + +/* FogMode */ +/* GL_LINEAR */ +#define GL_EXP 0x0800 +#define GL_EXP2 0x0801 + +/* FogParameter */ +#define GL_FOG_DENSITY 0x0B62 +#define GL_FOG_START 0x0B63 +#define GL_FOG_END 0x0B64 +#define GL_FOG_MODE 0x0B65 +#define GL_FOG_COLOR 0x0B66 + +/* FrontFaceDirection */ +#define GL_CW 0x0900 +#define GL_CCW 0x0901 + +/* GetPName */ +#define GL_CURRENT_COLOR 0x0B00 +#define GL_CURRENT_NORMAL 0x0B02 +#define GL_CURRENT_TEXTURE_COORDS 0x0B03 +#define GL_POINT_SIZE 0x0B11 +#define GL_POINT_SIZE_MIN 0x8126 +#define GL_POINT_SIZE_MAX 0x8127 +#define GL_POINT_FADE_THRESHOLD_SIZE 0x8128 +#define GL_POINT_DISTANCE_ATTENUATION 0x8129 +#define GL_SMOOTH_POINT_SIZE_RANGE 0x0B12 +#define GL_LINE_WIDTH 0x0B21 +#define GL_SMOOTH_LINE_WIDTH_RANGE 0x0B22 +#define GL_ALIASED_POINT_SIZE_RANGE 0x846D +#define GL_ALIASED_LINE_WIDTH_RANGE 0x846E +#define GL_CULL_FACE_MODE 0x0B45 +#define GL_FRONT_FACE 0x0B46 +#define GL_SHADE_MODEL 0x0B54 +#define GL_DEPTH_RANGE 0x0B70 +#define GL_DEPTH_WRITEMASK 0x0B72 +#define GL_DEPTH_CLEAR_VALUE 0x0B73 +#define GL_DEPTH_FUNC 0x0B74 +#define GL_STENCIL_CLEAR_VALUE 0x0B91 +#define GL_STENCIL_FUNC 0x0B92 +#define GL_STENCIL_VALUE_MASK 0x0B93 +#define GL_STENCIL_FAIL 0x0B94 +#define GL_STENCIL_PASS_DEPTH_FAIL 0x0B95 +#define GL_STENCIL_PASS_DEPTH_PASS 0x0B96 +#define GL_STENCIL_REF 0x0B97 +#define GL_STENCIL_WRITEMASK 0x0B98 +#define GL_MATRIX_MODE 0x0BA0 +#define GL_VIEWPORT 0x0BA2 +#define GL_MODELVIEW_STACK_DEPTH 0x0BA3 +#define GL_PROJECTION_STACK_DEPTH 0x0BA4 +#define GL_TEXTURE_STACK_DEPTH 0x0BA5 +#define GL_MODELVIEW_MATRIX 0x0BA6 +#define GL_PROJECTION_MATRIX 0x0BA7 +#define GL_TEXTURE_MATRIX 0x0BA8 +#define GL_ALPHA_TEST_FUNC 0x0BC1 +#define GL_ALPHA_TEST_REF 0x0BC2 +#define GL_BLEND_DST 0x0BE0 +#define GL_BLEND_SRC 0x0BE1 +#define GL_LOGIC_OP_MODE 0x0BF0 +#define GL_SCISSOR_BOX 0x0C10 +#define GL_SCISSOR_TEST 0x0C11 +#define GL_COLOR_CLEAR_VALUE 0x0C22 +#define GL_COLOR_WRITEMASK 0x0C23 +#define GL_UNPACK_ALIGNMENT 0x0CF5 +#define GL_PACK_ALIGNMENT 0x0D05 +#define GL_MAX_LIGHTS 0x0D31 +#define GL_MAX_CLIP_PLANES 0x0D32 +#define GL_MAX_TEXTURE_SIZE 0x0D33 +#define GL_MAX_MODELVIEW_STACK_DEPTH 0x0D36 +#define GL_MAX_PROJECTION_STACK_DEPTH 0x0D38 +#define GL_MAX_TEXTURE_STACK_DEPTH 0x0D39 +#define GL_MAX_VIEWPORT_DIMS 0x0D3A +#define GL_MAX_TEXTURE_UNITS 0x84E2 +#define GL_SUBPIXEL_BITS 0x0D50 +#define GL_RED_BITS 0x0D52 +#define GL_GREEN_BITS 0x0D53 +#define GL_BLUE_BITS 0x0D54 +#define GL_ALPHA_BITS 0x0D55 +#define GL_DEPTH_BITS 0x0D56 +#define GL_STENCIL_BITS 0x0D57 +#define GL_POLYGON_OFFSET_UNITS 0x2A00 +#define GL_POLYGON_OFFSET_FILL 0x8037 +#define GL_POLYGON_OFFSET_FACTOR 0x8038 +#define GL_TEXTURE_BINDING_2D 0x8069 +#define GL_VERTEX_ARRAY_SIZE 0x807A +#define GL_VERTEX_ARRAY_TYPE 0x807B +#define GL_VERTEX_ARRAY_STRIDE 0x807C +#define GL_NORMAL_ARRAY_TYPE 0x807E +#define GL_NORMAL_ARRAY_STRIDE 0x807F +#define GL_COLOR_ARRAY_SIZE 0x8081 +#define GL_COLOR_ARRAY_TYPE 0x8082 +#define GL_COLOR_ARRAY_STRIDE 0x8083 +#define GL_TEXTURE_COORD_ARRAY_SIZE 0x8088 +#define GL_TEXTURE_COORD_ARRAY_TYPE 0x8089 +#define GL_TEXTURE_COORD_ARRAY_STRIDE 0x808A +#define GL_VERTEX_ARRAY_POINTER 0x808E +#define GL_NORMAL_ARRAY_POINTER 0x808F +#define GL_COLOR_ARRAY_POINTER 0x8090 +#define GL_TEXTURE_COORD_ARRAY_POINTER 0x8092 +#define GL_SAMPLE_BUFFERS 0x80A8 +#define GL_SAMPLES 0x80A9 +#define GL_SAMPLE_COVERAGE_VALUE 0x80AA +#define GL_SAMPLE_COVERAGE_INVERT 0x80AB + +/* GetTextureParameter */ +/* GL_TEXTURE_MAG_FILTER */ +/* GL_TEXTURE_MIN_FILTER */ +/* GL_TEXTURE_WRAP_S */ +/* GL_TEXTURE_WRAP_T */ + +#define GL_NUM_COMPRESSED_TEXTURE_FORMATS 0x86A2 +#define GL_COMPRESSED_TEXTURE_FORMATS 0x86A3 + +/* HintMode */ +#define GL_DONT_CARE 0x1100 +#define GL_FASTEST 0x1101 +#define GL_NICEST 0x1102 + +/* HintTarget */ +#define GL_PERSPECTIVE_CORRECTION_HINT 0x0C50 +#define GL_POINT_SMOOTH_HINT 0x0C51 +#define GL_LINE_SMOOTH_HINT 0x0C52 +#define GL_FOG_HINT 0x0C54 +#define GL_GENERATE_MIPMAP_HINT 0x8192 + +/* LightModelParameter */ +#define GL_LIGHT_MODEL_AMBIENT 0x0B53 +#define GL_LIGHT_MODEL_TWO_SIDE 0x0B52 + +/* LightParameter */ +#define GL_AMBIENT 0x1200 +#define GL_DIFFUSE 0x1201 +#define GL_SPECULAR 0x1202 +#define GL_POSITION 0x1203 +#define GL_SPOT_DIRECTION 0x1204 +#define GL_SPOT_EXPONENT 0x1205 +#define GL_SPOT_CUTOFF 0x1206 +#define GL_CONSTANT_ATTENUATION 0x1207 +#define GL_LINEAR_ATTENUATION 0x1208 +#define GL_QUADRATIC_ATTENUATION 0x1209 + +/* DataType */ +#define GL_BYTE 0x1400 +#define GL_UNSIGNED_BYTE 0x1401 +#define GL_SHORT 0x1402 +#define GL_UNSIGNED_SHORT 0x1403 +#define GL_FLOAT 0x1406 +#define GL_FIXED 0x140C + +/* LogicOp */ +#define GL_CLEAR 0x1500 +#define GL_AND 0x1501 +#define GL_AND_REVERSE 0x1502 +#define GL_COPY 0x1503 +#define GL_AND_INVERTED 0x1504 +#define GL_NOOP 0x1505 +#define GL_XOR 0x1506 +#define GL_OR 0x1507 +#define GL_NOR 0x1508 +#define GL_EQUIV 0x1509 +#define GL_INVERT 0x150A +#define GL_OR_REVERSE 0x150B +#define GL_COPY_INVERTED 0x150C +#define GL_OR_INVERTED 0x150D +#define GL_NAND 0x150E +#define GL_SET 0x150F + +/* MaterialFace */ +/* GL_FRONT_AND_BACK */ + +/* MaterialParameter */ +#define GL_EMISSION 0x1600 +#define GL_SHININESS 0x1601 +#define GL_AMBIENT_AND_DIFFUSE 0x1602 +/* GL_AMBIENT */ +/* GL_DIFFUSE */ +/* GL_SPECULAR */ + +/* MatrixMode */ +#define GL_MODELVIEW 0x1700 +#define GL_PROJECTION 0x1701 +#define GL_TEXTURE 0x1702 + +/* NormalPointerType */ +/* GL_BYTE */ +/* GL_SHORT */ +/* GL_FLOAT */ +/* GL_FIXED */ + +/* PixelFormat */ +#define GL_ALPHA 0x1906 +#define GL_RGB 0x1907 +#define GL_RGBA 0x1908 +#define GL_LUMINANCE 0x1909 +#define GL_LUMINANCE_ALPHA 0x190A + +/* PixelStoreParameter */ +#define GL_UNPACK_ALIGNMENT 0x0CF5 +#define GL_PACK_ALIGNMENT 0x0D05 + +/* PixelType */ +/* GL_UNSIGNED_BYTE */ +#define GL_UNSIGNED_SHORT_4_4_4_4 0x8033 +#define GL_UNSIGNED_SHORT_5_5_5_1 0x8034 +#define GL_UNSIGNED_SHORT_5_6_5 0x8363 + +/* ShadingModel */ +#define GL_FLAT 0x1D00 +#define GL_SMOOTH 0x1D01 + +/* StencilFunction */ +/* GL_NEVER */ +/* GL_LESS */ +/* GL_EQUAL */ +/* GL_LEQUAL */ +/* GL_GREATER */ +/* GL_NOTEQUAL */ +/* GL_GEQUAL */ +/* GL_ALWAYS */ + +/* StencilOp */ +/* GL_ZERO */ +#define GL_KEEP 0x1E00 +#define GL_REPLACE 0x1E01 +#define GL_INCR 0x1E02 +#define GL_DECR 0x1E03 +/* GL_INVERT */ + +/* StringName */ +#define GL_VENDOR 0x1F00 +#define GL_RENDERER 0x1F01 +#define GL_VERSION 0x1F02 +#define GL_EXTENSIONS 0x1F03 + +/* TexCoordPointerType */ +/* GL_SHORT */ +/* GL_FLOAT */ +/* GL_FIXED */ +/* GL_BYTE */ + +/* TextureEnvMode */ +#define GL_MODULATE 0x2100 +#define GL_DECAL 0x2101 +/* GL_BLEND */ +#define GL_ADD 0x0104 +/* GL_REPLACE */ + +/* TextureEnvParameter */ +#define GL_TEXTURE_ENV_MODE 0x2200 +#define GL_TEXTURE_ENV_COLOR 0x2201 + +/* TextureEnvTarget */ +#define GL_TEXTURE_ENV 0x2300 + +/* TextureMagFilter */ +#define GL_NEAREST 0x2600 +#define GL_LINEAR 0x2601 + +/* TextureMinFilter */ +/* GL_NEAREST */ +/* GL_LINEAR */ +#define GL_NEAREST_MIPMAP_NEAREST 0x2700 +#define GL_LINEAR_MIPMAP_NEAREST 0x2701 +#define GL_NEAREST_MIPMAP_LINEAR 0x2702 +#define GL_LINEAR_MIPMAP_LINEAR 0x2703 + +/* TextureParameterName */ +#define GL_TEXTURE_MAG_FILTER 0x2800 +#define GL_TEXTURE_MIN_FILTER 0x2801 +#define GL_TEXTURE_WRAP_S 0x2802 +#define GL_TEXTURE_WRAP_T 0x2803 +#define GL_GENERATE_MIPMAP 0x8191 + +/* TextureTarget */ +/* GL_TEXTURE_2D */ + +/* TextureUnit */ +#define GL_TEXTURE0 0x84C0 +#define GL_TEXTURE1 0x84C1 +#define GL_TEXTURE2 0x84C2 +#define GL_TEXTURE3 0x84C3 +#define GL_TEXTURE4 0x84C4 +#define GL_TEXTURE5 0x84C5 +#define GL_TEXTURE6 0x84C6 +#define GL_TEXTURE7 0x84C7 +#define GL_TEXTURE8 0x84C8 +#define GL_TEXTURE9 0x84C9 +#define GL_TEXTURE10 0x84CA +#define GL_TEXTURE11 0x84CB +#define GL_TEXTURE12 0x84CC +#define GL_TEXTURE13 0x84CD +#define GL_TEXTURE14 0x84CE +#define GL_TEXTURE15 0x84CF +#define GL_TEXTURE16 0x84D0 +#define GL_TEXTURE17 0x84D1 +#define GL_TEXTURE18 0x84D2 +#define GL_TEXTURE19 0x84D3 +#define GL_TEXTURE20 0x84D4 +#define GL_TEXTURE21 0x84D5 +#define GL_TEXTURE22 0x84D6 +#define GL_TEXTURE23 0x84D7 +#define GL_TEXTURE24 0x84D8 +#define GL_TEXTURE25 0x84D9 +#define GL_TEXTURE26 0x84DA +#define GL_TEXTURE27 0x84DB +#define GL_TEXTURE28 0x84DC +#define GL_TEXTURE29 0x84DD +#define GL_TEXTURE30 0x84DE +#define GL_TEXTURE31 0x84DF +#define GL_ACTIVE_TEXTURE 0x84E0 +#define GL_CLIENT_ACTIVE_TEXTURE 0x84E1 + +/* TextureWrapMode */ +#define GL_REPEAT 0x2901 +#define GL_CLAMP_TO_EDGE 0x812F + +/* VertexPointerType */ +/* GL_SHORT */ +/* GL_FLOAT */ +/* GL_FIXED */ +/* GL_BYTE */ + +/* LightName */ +#define GL_LIGHT0 0x4000 +#define GL_LIGHT1 0x4001 +#define GL_LIGHT2 0x4002 +#define GL_LIGHT3 0x4003 +#define GL_LIGHT4 0x4004 +#define GL_LIGHT5 0x4005 +#define GL_LIGHT6 0x4006 +#define GL_LIGHT7 0x4007 + +/* Buffer Objects */ +#define GL_ARRAY_BUFFER 0x8892 +#define GL_ELEMENT_ARRAY_BUFFER 0x8893 + +#define GL_ARRAY_BUFFER_BINDING 0x8894 +#define GL_ELEMENT_ARRAY_BUFFER_BINDING 0x8895 +#define GL_VERTEX_ARRAY_BUFFER_BINDING 0x8896 +#define GL_NORMAL_ARRAY_BUFFER_BINDING 0x8897 +#define GL_COLOR_ARRAY_BUFFER_BINDING 0x8898 +#define GL_TEXTURE_COORD_ARRAY_BUFFER_BINDING 0x889A + +#define GL_STATIC_DRAW 0x88E4 +#define GL_DYNAMIC_DRAW 0x88E8 + +#define GL_BUFFER_SIZE 0x8764 +#define GL_BUFFER_USAGE 0x8765 + +/* Texture combine + dot3 */ +#define GL_SUBTRACT 0x84E7 +#define GL_COMBINE 0x8570 +#define GL_COMBINE_RGB 0x8571 +#define GL_COMBINE_ALPHA 0x8572 +#define GL_RGB_SCALE 0x8573 +#define GL_ADD_SIGNED 0x8574 +#define GL_INTERPOLATE 0x8575 +#define GL_CONSTANT 0x8576 +#define GL_PRIMARY_COLOR 0x8577 +#define GL_PREVIOUS 0x8578 +#define GL_OPERAND0_RGB 0x8590 +#define GL_OPERAND1_RGB 0x8591 +#define GL_OPERAND2_RGB 0x8592 +#define GL_OPERAND0_ALPHA 0x8598 +#define GL_OPERAND1_ALPHA 0x8599 +#define GL_OPERAND2_ALPHA 0x859A + +#define GL_ALPHA_SCALE 0x0D1C + +#define GL_SRC0_RGB 0x8580 +#define GL_SRC1_RGB 0x8581 +#define GL_SRC2_RGB 0x8582 +#define GL_SRC0_ALPHA 0x8588 +#define GL_SRC1_ALPHA 0x8589 +#define GL_SRC2_ALPHA 0x858A + +#define GL_DOT3_RGB 0x86AE +#define GL_DOT3_RGBA 0x86AF + +/*------------------------------------------------------------------------* + * required OES extension tokens + *------------------------------------------------------------------------*/ + +/* OES_read_format */ +#ifndef GL_OES_read_format +#define GL_IMPLEMENTATION_COLOR_READ_TYPE_OES 0x8B9A +#define GL_IMPLEMENTATION_COLOR_READ_FORMAT_OES 0x8B9B +#endif + +/* GL_OES_compressed_paletted_texture */ +#ifndef GL_OES_compressed_paletted_texture +#define GL_PALETTE4_RGB8_OES 0x8B90 +#define GL_PALETTE4_RGBA8_OES 0x8B91 +#define GL_PALETTE4_R5_G6_B5_OES 0x8B92 +#define GL_PALETTE4_RGBA4_OES 0x8B93 +#define GL_PALETTE4_RGB5_A1_OES 0x8B94 +#define GL_PALETTE8_RGB8_OES 0x8B95 +#define GL_PALETTE8_RGBA8_OES 0x8B96 +#define GL_PALETTE8_R5_G6_B5_OES 0x8B97 +#define GL_PALETTE8_RGBA4_OES 0x8B98 +#define GL_PALETTE8_RGB5_A1_OES 0x8B99 +#endif + +/* OES_point_size_array */ +#ifndef GL_OES_point_size_array +#define GL_POINT_SIZE_ARRAY_OES 0x8B9C +#define GL_POINT_SIZE_ARRAY_TYPE_OES 0x898A +#define GL_POINT_SIZE_ARRAY_STRIDE_OES 0x898B +#define GL_POINT_SIZE_ARRAY_POINTER_OES 0x898C +#define GL_POINT_SIZE_ARRAY_BUFFER_BINDING_OES 0x8B9F +#endif + +/* GL_OES_point_sprite */ +#ifndef GL_OES_point_sprite +#define GL_POINT_SPRITE_OES 0x8861 +#define GL_COORD_REPLACE_OES 0x8862 +#endif + +/*************************************************************/ + +/* Available only in Common profile */ +GL_API void GL_APIENTRY glAlphaFunc (GLenum func, GLclampf ref); +GL_API void GL_APIENTRY glClearColor (GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha); +GL_API void GL_APIENTRY glClearDepthf (GLclampf depth); +GL_API void GL_APIENTRY glClipPlanef (GLenum plane, const GLfloat *equation); +GL_API void GL_APIENTRY glColor4f (GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha); +GL_API void GL_APIENTRY glDepthRangef (GLclampf zNear, GLclampf zFar); +GL_API void GL_APIENTRY glFogf (GLenum pname, GLfloat param); +GL_API void GL_APIENTRY glFogfv (GLenum pname, const GLfloat *params); +GL_API void GL_APIENTRY glFrustumf (GLfloat left, GLfloat right, GLfloat bottom, GLfloat top, GLfloat zNear, GLfloat zFar); +GL_API void GL_APIENTRY glGetClipPlanef (GLenum pname, GLfloat eqn[4]); +GL_API void GL_APIENTRY glGetFloatv (GLenum pname, GLfloat *params); +GL_API void GL_APIENTRY glGetLightfv (GLenum light, GLenum pname, GLfloat *params); +GL_API void GL_APIENTRY glGetMaterialfv (GLenum face, GLenum pname, GLfloat *params); +GL_API void GL_APIENTRY glGetTexEnvfv (GLenum env, GLenum pname, GLfloat *params); +GL_API void GL_APIENTRY glGetTexParameterfv (GLenum target, GLenum pname, GLfloat *params); +GL_API void GL_APIENTRY glLightModelf (GLenum pname, GLfloat param); +GL_API void GL_APIENTRY glLightModelfv (GLenum pname, const GLfloat *params); +GL_API void GL_APIENTRY glLightf (GLenum light, GLenum pname, GLfloat param); +GL_API void GL_APIENTRY glLightfv (GLenum light, GLenum pname, const GLfloat *params); +GL_API void GL_APIENTRY glLineWidth (GLfloat width); +GL_API void GL_APIENTRY glLoadMatrixf (const GLfloat *m); +GL_API void GL_APIENTRY glMaterialf (GLenum face, GLenum pname, GLfloat param); +GL_API void GL_APIENTRY glMaterialfv (GLenum face, GLenum pname, const GLfloat *params); +GL_API void GL_APIENTRY glMultMatrixf (const GLfloat *m); +GL_API void GL_APIENTRY glMultiTexCoord4f (GLenum target, GLfloat s, GLfloat t, GLfloat r, GLfloat q); +GL_API void GL_APIENTRY glNormal3f (GLfloat nx, GLfloat ny, GLfloat nz); +GL_API void GL_APIENTRY glOrthof (GLfloat left, GLfloat right, GLfloat bottom, GLfloat top, GLfloat zNear, GLfloat zFar); +GL_API void GL_APIENTRY glPointParameterf (GLenum pname, GLfloat param); +GL_API void GL_APIENTRY glPointParameterfv (GLenum pname, const GLfloat *params); +GL_API void GL_APIENTRY glPointSize (GLfloat size); +GL_API void GL_APIENTRY glPolygonOffset (GLfloat factor, GLfloat units); +GL_API void GL_APIENTRY glRotatef (GLfloat angle, GLfloat x, GLfloat y, GLfloat z); +GL_API void GL_APIENTRY glScalef (GLfloat x, GLfloat y, GLfloat z); +GL_API void GL_APIENTRY glTexEnvf (GLenum target, GLenum pname, GLfloat param); +GL_API void GL_APIENTRY glTexEnvfv (GLenum target, GLenum pname, const GLfloat *params); +GL_API void GL_APIENTRY glTexParameterf (GLenum target, GLenum pname, GLfloat param); +GL_API void GL_APIENTRY glTexParameterfv (GLenum target, GLenum pname, const GLfloat *params); +GL_API void GL_APIENTRY glTranslatef (GLfloat x, GLfloat y, GLfloat z); + +/* Available in both Common and Common-Lite profiles */ +GL_API void GL_APIENTRY glActiveTexture (GLenum texture); +GL_API void GL_APIENTRY glAlphaFuncx (GLenum func, GLclampx ref); +GL_API void GL_APIENTRY glBindBuffer (GLenum target, GLuint buffer); +GL_API void GL_APIENTRY glBindTexture (GLenum target, GLuint texture); +GL_API void GL_APIENTRY glBlendFunc (GLenum sfactor, GLenum dfactor); +GL_API void GL_APIENTRY glBufferData (GLenum target, GLsizeiptr size, const GLvoid *data, GLenum usage); +GL_API void GL_APIENTRY glBufferSubData (GLenum target, GLintptr offset, GLsizeiptr size, const GLvoid *data); +GL_API void GL_APIENTRY glClear (GLbitfield mask); +GL_API void GL_APIENTRY glClearColorx (GLclampx red, GLclampx green, GLclampx blue, GLclampx alpha); +GL_API void GL_APIENTRY glClearDepthx (GLclampx depth); +GL_API void GL_APIENTRY glClearStencil (GLint s); +GL_API void GL_APIENTRY glClientActiveTexture (GLenum texture); +GL_API void GL_APIENTRY glClipPlanex (GLenum plane, const GLfixed *equation); +GL_API void GL_APIENTRY glColor4ub (GLubyte red, GLubyte green, GLubyte blue, GLubyte alpha); +GL_API void GL_APIENTRY glColor4x (GLfixed red, GLfixed green, GLfixed blue, GLfixed alpha); +GL_API void GL_APIENTRY glColorMask (GLboolean red, GLboolean green, GLboolean blue, GLboolean alpha); +GL_API void GL_APIENTRY glColorPointer (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer); +GL_API void GL_APIENTRY glCompressedTexImage2D (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const GLvoid *data); +GL_API void GL_APIENTRY glCompressedTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const GLvoid *data); +GL_API void GL_APIENTRY glCopyTexImage2D (GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border); +GL_API void GL_APIENTRY glCopyTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height); +GL_API void GL_APIENTRY glCullFace (GLenum mode); +GL_API void GL_APIENTRY glDeleteBuffers (GLsizei n, const GLuint *buffers); +GL_API void GL_APIENTRY glDeleteTextures (GLsizei n, const GLuint *textures); +GL_API void GL_APIENTRY glDepthFunc (GLenum func); +GL_API void GL_APIENTRY glDepthMask (GLboolean flag); +GL_API void GL_APIENTRY glDepthRangex (GLclampx zNear, GLclampx zFar); +GL_API void GL_APIENTRY glDisable (GLenum cap); +GL_API void GL_APIENTRY glDisableClientState (GLenum array); +GL_API void GL_APIENTRY glDrawArrays (GLenum mode, GLint first, GLsizei count); +GL_API void GL_APIENTRY glDrawElements (GLenum mode, GLsizei count, GLenum type, const GLvoid *indices); +GL_API void GL_APIENTRY glEnable (GLenum cap); +GL_API void GL_APIENTRY glEnableClientState (GLenum array); +GL_API void GL_APIENTRY glFinish (void); +GL_API void GL_APIENTRY glFlush (void); +GL_API void GL_APIENTRY glFogx (GLenum pname, GLfixed param); +GL_API void GL_APIENTRY glFogxv (GLenum pname, const GLfixed *params); +GL_API void GL_APIENTRY glFrontFace (GLenum mode); +GL_API void GL_APIENTRY glFrustumx (GLfixed left, GLfixed right, GLfixed bottom, GLfixed top, GLfixed zNear, GLfixed zFar); +GL_API void GL_APIENTRY glGetBooleanv (GLenum pname, GLboolean *params); +GL_API void GL_APIENTRY glGetBufferParameteriv (GLenum target, GLenum pname, GLint *params); +GL_API void GL_APIENTRY glGetClipPlanex (GLenum pname, GLfixed eqn[4]); +GL_API void GL_APIENTRY glGenBuffers (GLsizei n, GLuint *buffers); +GL_API void GL_APIENTRY glGenTextures (GLsizei n, GLuint *textures); +GL_API GLenum GL_APIENTRY glGetError (void); +GL_API void GL_APIENTRY glGetFixedv (GLenum pname, GLfixed *params); +GL_API void GL_APIENTRY glGetIntegerv (GLenum pname, GLint *params); +GL_API void GL_APIENTRY glGetLightxv (GLenum light, GLenum pname, GLfixed *params); +GL_API void GL_APIENTRY glGetMaterialxv (GLenum face, GLenum pname, GLfixed *params); +GL_API void GL_APIENTRY glGetPointerv (GLenum pname, GLvoid **params); +GL_API const GLubyte * GL_APIENTRY glGetString (GLenum name); +GL_API void GL_APIENTRY glGetTexEnviv (GLenum env, GLenum pname, GLint *params); +GL_API void GL_APIENTRY glGetTexEnvxv (GLenum env, GLenum pname, GLfixed *params); +GL_API void GL_APIENTRY glGetTexParameteriv (GLenum target, GLenum pname, GLint *params); +GL_API void GL_APIENTRY glGetTexParameterxv (GLenum target, GLenum pname, GLfixed *params); +GL_API void GL_APIENTRY glHint (GLenum target, GLenum mode); +GL_API GLboolean GL_APIENTRY glIsBuffer (GLuint buffer); +GL_API GLboolean GL_APIENTRY glIsEnabled (GLenum cap); +GL_API GLboolean GL_APIENTRY glIsTexture (GLuint texture); +GL_API void GL_APIENTRY glLightModelx (GLenum pname, GLfixed param); +GL_API void GL_APIENTRY glLightModelxv (GLenum pname, const GLfixed *params); +GL_API void GL_APIENTRY glLightx (GLenum light, GLenum pname, GLfixed param); +GL_API void GL_APIENTRY glLightxv (GLenum light, GLenum pname, const GLfixed *params); +GL_API void GL_APIENTRY glLineWidthx (GLfixed width); +GL_API void GL_APIENTRY glLoadIdentity (void); +GL_API void GL_APIENTRY glLoadMatrixx (const GLfixed *m); +GL_API void GL_APIENTRY glLogicOp (GLenum opcode); +GL_API void GL_APIENTRY glMaterialx (GLenum face, GLenum pname, GLfixed param); +GL_API void GL_APIENTRY glMaterialxv (GLenum face, GLenum pname, const GLfixed *params); +GL_API void GL_APIENTRY glMatrixMode (GLenum mode); +GL_API void GL_APIENTRY glMultMatrixx (const GLfixed *m); +GL_API void GL_APIENTRY glMultiTexCoord4x (GLenum target, GLfixed s, GLfixed t, GLfixed r, GLfixed q); +GL_API void GL_APIENTRY glNormal3x (GLfixed nx, GLfixed ny, GLfixed nz); +GL_API void GL_APIENTRY glNormalPointer (GLenum type, GLsizei stride, const GLvoid *pointer); +GL_API void GL_APIENTRY glOrthox (GLfixed left, GLfixed right, GLfixed bottom, GLfixed top, GLfixed zNear, GLfixed zFar); +GL_API void GL_APIENTRY glPixelStorei (GLenum pname, GLint param); +GL_API void GL_APIENTRY glPointParameterx (GLenum pname, GLfixed param); +GL_API void GL_APIENTRY glPointParameterxv (GLenum pname, const GLfixed *params); +GL_API void GL_APIENTRY glPointSizex (GLfixed size); +GL_API void GL_APIENTRY glPolygonOffsetx (GLfixed factor, GLfixed units); +GL_API void GL_APIENTRY glPopMatrix (void); +GL_API void GL_APIENTRY glPushMatrix (void); +GL_API void GL_APIENTRY glReadPixels (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLvoid *pixels); +GL_API void GL_APIENTRY glRotatex (GLfixed angle, GLfixed x, GLfixed y, GLfixed z); +GL_API void GL_APIENTRY glSampleCoverage (GLclampf value, GLboolean invert); +GL_API void GL_APIENTRY glSampleCoveragex (GLclampx value, GLboolean invert); +GL_API void GL_APIENTRY glScalex (GLfixed x, GLfixed y, GLfixed z); +GL_API void GL_APIENTRY glScissor (GLint x, GLint y, GLsizei width, GLsizei height); +GL_API void GL_APIENTRY glShadeModel (GLenum mode); +GL_API void GL_APIENTRY glStencilFunc (GLenum func, GLint ref, GLuint mask); +GL_API void GL_APIENTRY glStencilMask (GLuint mask); +GL_API void GL_APIENTRY glStencilOp (GLenum fail, GLenum zfail, GLenum zpass); +GL_API void GL_APIENTRY glTexCoordPointer (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer); +GL_API void GL_APIENTRY glTexEnvi (GLenum target, GLenum pname, GLint param); +GL_API void GL_APIENTRY glTexEnvx (GLenum target, GLenum pname, GLfixed param); +GL_API void GL_APIENTRY glTexEnviv (GLenum target, GLenum pname, const GLint *params); +GL_API void GL_APIENTRY glTexEnvxv (GLenum target, GLenum pname, const GLfixed *params); +GL_API void GL_APIENTRY glTexImage2D (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid *pixels); +GL_API void GL_APIENTRY glTexParameteri (GLenum target, GLenum pname, GLint param); +GL_API void GL_APIENTRY glTexParameterx (GLenum target, GLenum pname, GLfixed param); +GL_API void GL_APIENTRY glTexParameteriv (GLenum target, GLenum pname, const GLint *params); +GL_API void GL_APIENTRY glTexParameterxv (GLenum target, GLenum pname, const GLfixed *params); +GL_API void GL_APIENTRY glTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid *pixels); +GL_API void GL_APIENTRY glTranslatex (GLfixed x, GLfixed y, GLfixed z); +GL_API void GL_APIENTRY glVertexPointer (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer); +GL_API void GL_APIENTRY glViewport (GLint x, GLint y, GLsizei width, GLsizei height); + +/*------------------------------------------------------------------------* + * Required OES extension functions + *------------------------------------------------------------------------*/ + +/* GL_OES_read_format */ +#ifndef GL_OES_read_format +#define GL_OES_read_format 1 +#endif + +/* GL_OES_compressed_paletted_texture */ +#ifndef GL_OES_compressed_paletted_texture +#define GL_OES_compressed_paletted_texture 1 +#endif + +/* GL_OES_point_size_array */ +#ifndef GL_OES_point_size_array +#define GL_OES_point_size_array 1 +GL_API void GL_APIENTRY glPointSizePointerOES (GLenum type, GLsizei stride, const GLvoid *pointer); +#endif + +/* GL_OES_point_sprite */ +#ifndef GL_OES_point_sprite +#define GL_OES_point_sprite 1 +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* __gl_h_ */ + diff --git a/opengles-3.1/include/GLES/glext.h b/opengles-3.1/include/GLES/glext.h new file mode 100644 index 0000000000..5b46ae6d05 --- /dev/null +++ b/opengles-3.1/include/GLES/glext.h @@ -0,0 +1,1278 @@ +#ifndef __glext_h_ +#define __glext_h_ + +/* $Revision: 19260 $ on $Date:: 2012-09-20 11:30:36 -0700 #$ */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This document is licensed under the SGI Free Software B License Version + * 2.0. For details, see http://oss.sgi.com/projects/FreeB/ . + */ + +#ifndef GL_APIENTRYP +# define GL_APIENTRYP GL_APIENTRY* +#endif + +/*------------------------------------------------------------------------* + * OES extension tokens + *------------------------------------------------------------------------*/ + +/* GL_OES_blend_equation_separate */ +#ifndef GL_OES_blend_equation_separate +/* BLEND_EQUATION_RGB_OES same as BLEND_EQUATION_OES */ +#define GL_BLEND_EQUATION_RGB_OES 0x8009 +#define GL_BLEND_EQUATION_ALPHA_OES 0x883D +#endif + +/* GL_OES_blend_func_separate */ +#ifndef GL_OES_blend_func_separate +#define GL_BLEND_DST_RGB_OES 0x80C8 +#define GL_BLEND_SRC_RGB_OES 0x80C9 +#define GL_BLEND_DST_ALPHA_OES 0x80CA +#define GL_BLEND_SRC_ALPHA_OES 0x80CB +#endif + +/* GL_OES_blend_subtract */ +#ifndef GL_OES_blend_subtract +#define GL_BLEND_EQUATION_OES 0x8009 +#define GL_FUNC_ADD_OES 0x8006 +#define GL_FUNC_SUBTRACT_OES 0x800A +#define GL_FUNC_REVERSE_SUBTRACT_OES 0x800B +#endif + +/* GL_OES_compressed_ETC1_RGB8_texture */ +#ifndef GL_OES_compressed_ETC1_RGB8_texture +#define GL_ETC1_RGB8_OES 0x8D64 +#endif + +/* GL_OES_depth24 */ +#ifndef GL_OES_depth24 +#define GL_DEPTH_COMPONENT24_OES 0x81A6 +#endif + +/* GL_OES_depth32 */ +#ifndef GL_OES_depth32 +#define GL_DEPTH_COMPONENT32_OES 0x81A7 +#endif + +/* GL_OES_draw_texture */ +#ifndef GL_OES_draw_texture +#define GL_TEXTURE_CROP_RECT_OES 0x8B9D +#endif + +/* GL_OES_EGL_image */ +#ifndef GL_OES_EGL_image +typedef void* GLeglImageOES; +#endif + +/* GL_OES_EGL_image_external */ +#ifndef GL_OES_EGL_image_external +/* GLeglImageOES defined in GL_OES_EGL_image already. */ +#define GL_TEXTURE_EXTERNAL_OES 0x8D65 +#define GL_TEXTURE_BINDING_EXTERNAL_OES 0x8D67 +#define GL_REQUIRED_TEXTURE_IMAGE_UNITS_OES 0x8D68 +#endif + +/* GL_OES_element_index_uint */ +#ifndef GL_OES_element_index_uint +#define GL_UNSIGNED_INT 0x1405 +#endif + +/* GL_OES_fixed_point */ +#ifndef GL_OES_fixed_point +#define GL_FIXED_OES 0x140C +#endif + +/* GL_OES_framebuffer_object */ +#ifndef GL_OES_framebuffer_object +#define GL_NONE_OES 0 +#define GL_FRAMEBUFFER_OES 0x8D40 +#define GL_RENDERBUFFER_OES 0x8D41 +#define GL_RGBA4_OES 0x8056 +#define GL_RGB5_A1_OES 0x8057 +#define GL_RGB565_OES 0x8D62 +#define GL_DEPTH_COMPONENT16_OES 0x81A5 +#define GL_RENDERBUFFER_WIDTH_OES 0x8D42 +#define GL_RENDERBUFFER_HEIGHT_OES 0x8D43 +#define GL_RENDERBUFFER_INTERNAL_FORMAT_OES 0x8D44 +#define GL_RENDERBUFFER_RED_SIZE_OES 0x8D50 +#define GL_RENDERBUFFER_GREEN_SIZE_OES 0x8D51 +#define GL_RENDERBUFFER_BLUE_SIZE_OES 0x8D52 +#define GL_RENDERBUFFER_ALPHA_SIZE_OES 0x8D53 +#define GL_RENDERBUFFER_DEPTH_SIZE_OES 0x8D54 +#define GL_RENDERBUFFER_STENCIL_SIZE_OES 0x8D55 +#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE_OES 0x8CD0 +#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME_OES 0x8CD1 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL_OES 0x8CD2 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE_OES 0x8CD3 +#define GL_COLOR_ATTACHMENT0_OES 0x8CE0 +#define GL_DEPTH_ATTACHMENT_OES 0x8D00 +#define GL_STENCIL_ATTACHMENT_OES 0x8D20 +#define GL_FRAMEBUFFER_COMPLETE_OES 0x8CD5 +#define GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT_OES 0x8CD6 +#define GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_OES 0x8CD7 +#define GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS_OES 0x8CD9 +#define GL_FRAMEBUFFER_INCOMPLETE_FORMATS_OES 0x8CDA +#define GL_FRAMEBUFFER_UNSUPPORTED_OES 0x8CDD +#define GL_FRAMEBUFFER_BINDING_OES 0x8CA6 +#define GL_RENDERBUFFER_BINDING_OES 0x8CA7 +#define GL_MAX_RENDERBUFFER_SIZE_OES 0x84E8 +#define GL_INVALID_FRAMEBUFFER_OPERATION_OES 0x0506 +#endif + +/* GL_OES_mapbuffer */ +#ifndef GL_OES_mapbuffer +#define GL_WRITE_ONLY_OES 0x88B9 +#define GL_BUFFER_ACCESS_OES 0x88BB +#define GL_BUFFER_MAPPED_OES 0x88BC +#define GL_BUFFER_MAP_POINTER_OES 0x88BD +#endif + +/* GL_OES_matrix_get */ +#ifndef GL_OES_matrix_get +#define GL_MODELVIEW_MATRIX_FLOAT_AS_INT_BITS_OES 0x898D +#define GL_PROJECTION_MATRIX_FLOAT_AS_INT_BITS_OES 0x898E +#define GL_TEXTURE_MATRIX_FLOAT_AS_INT_BITS_OES 0x898F +#endif + +/* GL_OES_matrix_palette */ +#ifndef GL_OES_matrix_palette +#define GL_MAX_VERTEX_UNITS_OES 0x86A4 +#define GL_MAX_PALETTE_MATRICES_OES 0x8842 +#define GL_MATRIX_PALETTE_OES 0x8840 +#define GL_MATRIX_INDEX_ARRAY_OES 0x8844 +#define GL_WEIGHT_ARRAY_OES 0x86AD +#define GL_CURRENT_PALETTE_MATRIX_OES 0x8843 +#define GL_MATRIX_INDEX_ARRAY_SIZE_OES 0x8846 +#define GL_MATRIX_INDEX_ARRAY_TYPE_OES 0x8847 +#define GL_MATRIX_INDEX_ARRAY_STRIDE_OES 0x8848 +#define GL_MATRIX_INDEX_ARRAY_POINTER_OES 0x8849 +#define GL_MATRIX_INDEX_ARRAY_BUFFER_BINDING_OES 0x8B9E +#define GL_WEIGHT_ARRAY_SIZE_OES 0x86AB +#define GL_WEIGHT_ARRAY_TYPE_OES 0x86A9 +#define GL_WEIGHT_ARRAY_STRIDE_OES 0x86AA +#define GL_WEIGHT_ARRAY_POINTER_OES 0x86AC +#define GL_WEIGHT_ARRAY_BUFFER_BINDING_OES 0x889E +#endif + +/* GL_OES_packed_depth_stencil */ +#ifndef GL_OES_packed_depth_stencil +#define GL_DEPTH_STENCIL_OES 0x84F9 +#define GL_UNSIGNED_INT_24_8_OES 0x84FA +#define GL_DEPTH24_STENCIL8_OES 0x88F0 +#endif + +/* GL_OES_required_internalformat */ +/* No new tokens introduced by this extension. */ + +/* GL_OES_rgb8_rgba8 */ +#ifndef GL_OES_rgb8_rgba8 +#define GL_RGB8_OES 0x8051 +#define GL_RGBA8_OES 0x8058 +#endif + +/* GL_OES_stencil1 */ +#ifndef GL_OES_stencil1 +#define GL_STENCIL_INDEX1_OES 0x8D46 +#endif + +/* GL_OES_stencil4 */ +#ifndef GL_OES_stencil4 +#define GL_STENCIL_INDEX4_OES 0x8D47 +#endif + +/* GL_OES_stencil8 */ +#ifndef GL_OES_stencil8 +#define GL_STENCIL_INDEX8_OES 0x8D48 +#endif + +/* GL_OES_stencil_wrap */ +#ifndef GL_OES_stencil_wrap +#define GL_INCR_WRAP_OES 0x8507 +#define GL_DECR_WRAP_OES 0x8508 +#endif + +/* GL_OES_texture_cube_map */ +#ifndef GL_OES_texture_cube_map +#define GL_NORMAL_MAP_OES 0x8511 +#define GL_REFLECTION_MAP_OES 0x8512 +#define GL_TEXTURE_CUBE_MAP_OES 0x8513 +#define GL_TEXTURE_BINDING_CUBE_MAP_OES 0x8514 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_X_OES 0x8515 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X_OES 0x8516 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y_OES 0x8517 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_OES 0x8518 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z_OES 0x8519 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_OES 0x851A +#define GL_MAX_CUBE_MAP_TEXTURE_SIZE_OES 0x851C +#define GL_TEXTURE_GEN_MODE_OES 0x2500 +#define GL_TEXTURE_GEN_STR_OES 0x8D60 +#endif + +/* GL_OES_texture_mirrored_repeat */ +#ifndef GL_OES_texture_mirrored_repeat +#define GL_MIRRORED_REPEAT_OES 0x8370 +#endif + +/* GL_OES_vertex_array_object */ +#ifndef GL_OES_vertex_array_object +#define GL_VERTEX_ARRAY_BINDING_OES 0x85B5 +#endif + +/*------------------------------------------------------------------------* + * AMD extension tokens + *------------------------------------------------------------------------*/ + +/* GL_AMD_compressed_3DC_texture */ +#ifndef GL_AMD_compressed_3DC_texture +#define GL_3DC_X_AMD 0x87F9 +#define GL_3DC_XY_AMD 0x87FA +#endif + +/* GL_AMD_compressed_ATC_texture */ +#ifndef GL_AMD_compressed_ATC_texture +#define GL_ATC_RGB_AMD 0x8C92 +#define GL_ATC_RGBA_EXPLICIT_ALPHA_AMD 0x8C93 +#define GL_ATC_RGBA_INTERPOLATED_ALPHA_AMD 0x87EE +#endif + +/*------------------------------------------------------------------------* + * APPLE extension tokens + *------------------------------------------------------------------------*/ + +/* GL_APPLE_copy_texture_levels */ +/* No new tokens introduced by this extension. */ + +/* GL_APPLE_framebuffer_multisample */ +#ifndef GL_APPLE_framebuffer_multisample +#define GL_RENDERBUFFER_SAMPLES_APPLE 0x8CAB +#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE_APPLE 0x8D56 +#define GL_MAX_SAMPLES_APPLE 0x8D57 +#define GL_READ_FRAMEBUFFER_APPLE 0x8CA8 +#define GL_DRAW_FRAMEBUFFER_APPLE 0x8CA9 +#define GL_DRAW_FRAMEBUFFER_BINDING_APPLE 0x8CA6 +#define GL_READ_FRAMEBUFFER_BINDING_APPLE 0x8CAA +#endif + +/* GL_APPLE_sync */ +#ifndef GL_APPLE_sync + +/* These types are defined with reference to + * in the Apple extension spec, but here we use the Khronos + * portable types in khrplatform.h, and assume those types + * are always defined. + * If any other extensions using these types are defined, + * the typedefs must move out of this block and be shared. + */ +typedef khronos_int64_t GLint64; +typedef khronos_uint64_t GLuint64; +typedef struct __GLsync *GLsync; + +#define GL_SYNC_OBJECT_APPLE 0x8A53 +#define GL_MAX_SERVER_WAIT_TIMEOUT_APPLE 0x9111 +#define GL_OBJECT_TYPE_APPLE 0x9112 +#define GL_SYNC_CONDITION_APPLE 0x9113 +#define GL_SYNC_STATUS_APPLE 0x9114 +#define GL_SYNC_FLAGS_APPLE 0x9115 +#define GL_SYNC_FENCE_APPLE 0x9116 +#define GL_SYNC_GPU_COMMANDS_COMPLETE_APPLE 0x9117 +#define GL_UNSIGNALED_APPLE 0x9118 +#define GL_SIGNALED_APPLE 0x9119 +#define GL_ALREADY_SIGNALED_APPLE 0x911A +#define GL_TIMEOUT_EXPIRED_APPLE 0x911B +#define GL_CONDITION_SATISFIED_APPLE 0x911C +#define GL_WAIT_FAILED_APPLE 0x911D +#define GL_SYNC_FLUSH_COMMANDS_BIT_APPLE 0x00000001 +#define GL_TIMEOUT_IGNORED_APPLE 0xFFFFFFFFFFFFFFFFull +#endif + +/* GL_APPLE_texture_2D_limited_npot */ +/* No new tokens introduced by this extension. */ + +/* GL_APPLE_texture_format_BGRA8888 */ +#ifndef GL_APPLE_texture_format_BGRA8888 +#define GL_BGRA_EXT 0x80E1 +#endif + +/* GL_APPLE_texture_max_level */ +#ifndef GL_APPLE_texture_max_level +#define GL_TEXTURE_MAX_LEVEL_APPLE 0x813D +#endif + +/*------------------------------------------------------------------------* + * ARM extension tokens + *------------------------------------------------------------------------*/ + +/* GL_ARM_rgba8 */ +/* No new tokens introduced by this extension. */ + +/*------------------------------------------------------------------------* + * EXT extension tokens + *------------------------------------------------------------------------*/ + +/* GL_EXT_blend_minmax */ +#ifndef GL_EXT_blend_minmax +#define GL_MIN_EXT 0x8007 +#define GL_MAX_EXT 0x8008 +#endif + +/* GL_EXT_discard_framebuffer */ +#ifndef GL_EXT_discard_framebuffer +#define GL_COLOR_EXT 0x1800 +#define GL_DEPTH_EXT 0x1801 +#define GL_STENCIL_EXT 0x1802 +#endif + +/* GL_EXT_map_buffer_range */ +#ifndef GL_EXT_map_buffer_range +#define GL_MAP_READ_BIT_EXT 0x0001 +#define GL_MAP_WRITE_BIT_EXT 0x0002 +#define GL_MAP_INVALIDATE_RANGE_BIT_EXT 0x0004 +#define GL_MAP_INVALIDATE_BUFFER_BIT_EXT 0x0008 +#define GL_MAP_FLUSH_EXPLICIT_BIT_EXT 0x0010 +#define GL_MAP_UNSYNCHRONIZED_BIT_EXT 0x0020 +#endif + +/* GL_EXT_multisampled_render_to_texture */ +#ifndef GL_EXT_multisampled_render_to_texture +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_SAMPLES_EXT 0x8D6C +/* reuse values from GL_EXT_framebuffer_multisample (desktop extension) */ +#define GL_RENDERBUFFER_SAMPLES_EXT 0x8CAB +#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE_EXT 0x8D56 +#define GL_MAX_SAMPLES_EXT 0x8D57 +#endif + +/* GL_EXT_multi_draw_arrays */ +/* No new tokens introduced by this extension. */ + +/* GL_EXT_read_format_bgra */ +#ifndef GL_EXT_read_format_bgra +#define GL_BGRA_EXT 0x80E1 +#define GL_UNSIGNED_SHORT_4_4_4_4_REV_EXT 0x8365 +#define GL_UNSIGNED_SHORT_1_5_5_5_REV_EXT 0x8366 +#endif + +/* GL_EXT_robustness */ +#ifndef GL_EXT_robustness +/* reuse GL_NO_ERROR */ +#define GL_GUILTY_CONTEXT_RESET_EXT 0x8253 +#define GL_INNOCENT_CONTEXT_RESET_EXT 0x8254 +#define GL_UNKNOWN_CONTEXT_RESET_EXT 0x8255 +#define GL_CONTEXT_ROBUST_ACCESS_EXT 0x90F3 +#define GL_RESET_NOTIFICATION_STRATEGY_EXT 0x8256 +#define GL_LOSE_CONTEXT_ON_RESET_EXT 0x8252 +#define GL_NO_RESET_NOTIFICATION_EXT 0x8261 +#endif + +/* GL_EXT_sRGB */ +#ifndef GL_EXT_sRGB +#define GL_SRGB_EXT 0x8C40 +#define GL_SRGB_ALPHA_EXT 0x8C42 +#define GL_SRGB8_ALPHA8_EXT 0x8C43 +#define GL_FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING_EXT 0x8210 +#endif + +/* GL_EXT_texture_compression_dxt1 */ +#ifndef GL_EXT_texture_compression_dxt1 +#define GL_COMPRESSED_RGB_S3TC_DXT1_EXT 0x83F0 +#define GL_COMPRESSED_RGBA_S3TC_DXT1_EXT 0x83F1 +#endif + +/* GL_EXT_texture_filter_anisotropic */ +#ifndef GL_EXT_texture_filter_anisotropic +#define GL_TEXTURE_MAX_ANISOTROPY_EXT 0x84FE +#define GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT 0x84FF +#endif + +/* GL_EXT_texture_format_BGRA8888 */ +#ifndef GL_EXT_texture_format_BGRA8888 +#define GL_BGRA_EXT 0x80E1 +#endif + +/* GL_EXT_texture_lod_bias */ +#ifndef GL_EXT_texture_lod_bias +#define GL_MAX_TEXTURE_LOD_BIAS_EXT 0x84FD +#define GL_TEXTURE_FILTER_CONTROL_EXT 0x8500 +#define GL_TEXTURE_LOD_BIAS_EXT 0x8501 +#endif + +/* GL_EXT_texture_storage */ +#ifndef GL_EXT_texture_storage +#define GL_TEXTURE_IMMUTABLE_FORMAT_EXT 0x912F +#define GL_ALPHA8_EXT 0x803C +#define GL_LUMINANCE8_EXT 0x8040 +#define GL_LUMINANCE8_ALPHA8_EXT 0x8045 +#define GL_RGBA32F_EXT 0x8814 +#define GL_RGB32F_EXT 0x8815 +#define GL_ALPHA32F_EXT 0x8816 +#define GL_LUMINANCE32F_EXT 0x8818 +#define GL_LUMINANCE_ALPHA32F_EXT 0x8819 +/* reuse GL_RGBA16F_EXT */ +#define GL_RGB16F_EXT 0x881B +#define GL_ALPHA16F_EXT 0x881C +#define GL_LUMINANCE16F_EXT 0x881E +#define GL_LUMINANCE_ALPHA16F_EXT 0x881F +#define GL_RGB10_A2_EXT 0x8059 +#define GL_RGB10_EXT 0x8052 +#define GL_BGRA8_EXT 0x93A1 +#endif + +/*------------------------------------------------------------------------* + * IMG extension tokens + *------------------------------------------------------------------------*/ + +/* GL_IMG_read_format */ +#ifndef GL_IMG_read_format +#define GL_BGRA_IMG 0x80E1 +#define GL_UNSIGNED_SHORT_4_4_4_4_REV_IMG 0x8365 +#endif + +/* GL_IMG_texture_compression_pvrtc */ +#ifndef GL_IMG_texture_compression_pvrtc +#define GL_COMPRESSED_RGB_PVRTC_4BPPV1_IMG 0x8C00 +#define GL_COMPRESSED_RGB_PVRTC_2BPPV1_IMG 0x8C01 +#define GL_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG 0x8C02 +#define GL_COMPRESSED_RGBA_PVRTC_2BPPV1_IMG 0x8C03 +#endif + +/* GL_IMG_texture_env_enhanced_fixed_function */ +#ifndef GL_IMG_texture_env_enhanced_fixed_function +#define GL_MODULATE_COLOR_IMG 0x8C04 +#define GL_RECIP_ADD_SIGNED_ALPHA_IMG 0x8C05 +#define GL_TEXTURE_ALPHA_MODULATE_IMG 0x8C06 +#define GL_FACTOR_ALPHA_MODULATE_IMG 0x8C07 +#define GL_FRAGMENT_ALPHA_MODULATE_IMG 0x8C08 +#define GL_ADD_BLEND_IMG 0x8C09 +#define GL_DOT3_RGBA_IMG 0x86AF +#endif + +/* GL_IMG_user_clip_plane */ +#ifndef GL_IMG_user_clip_plane +#define GL_CLIP_PLANE0_IMG 0x3000 +#define GL_CLIP_PLANE1_IMG 0x3001 +#define GL_CLIP_PLANE2_IMG 0x3002 +#define GL_CLIP_PLANE3_IMG 0x3003 +#define GL_CLIP_PLANE4_IMG 0x3004 +#define GL_CLIP_PLANE5_IMG 0x3005 +#define GL_MAX_CLIP_PLANES_IMG 0x0D32 +#endif + +/* GL_IMG_multisampled_render_to_texture */ +#ifndef GL_IMG_multisampled_render_to_texture +#define GL_RENDERBUFFER_SAMPLES_IMG 0x9133 +#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE_IMG 0x9134 +#define GL_MAX_SAMPLES_IMG 0x9135 +#define GL_TEXTURE_SAMPLES_IMG 0x9136 +#endif + +/*------------------------------------------------------------------------* + * NV extension tokens + *------------------------------------------------------------------------*/ + +/* GL_NV_fence */ +#ifndef GL_NV_fence +#define GL_ALL_COMPLETED_NV 0x84F2 +#define GL_FENCE_STATUS_NV 0x84F3 +#define GL_FENCE_CONDITION_NV 0x84F4 +#endif + +/*------------------------------------------------------------------------* + * QCOM extension tokens + *------------------------------------------------------------------------*/ + +/* GL_QCOM_driver_control */ +/* No new tokens introduced by this extension. */ + +/* GL_QCOM_extended_get */ +#ifndef GL_QCOM_extended_get +#define GL_TEXTURE_WIDTH_QCOM 0x8BD2 +#define GL_TEXTURE_HEIGHT_QCOM 0x8BD3 +#define GL_TEXTURE_DEPTH_QCOM 0x8BD4 +#define GL_TEXTURE_INTERNAL_FORMAT_QCOM 0x8BD5 +#define GL_TEXTURE_FORMAT_QCOM 0x8BD6 +#define GL_TEXTURE_TYPE_QCOM 0x8BD7 +#define GL_TEXTURE_IMAGE_VALID_QCOM 0x8BD8 +#define GL_TEXTURE_NUM_LEVELS_QCOM 0x8BD9 +#define GL_TEXTURE_TARGET_QCOM 0x8BDA +#define GL_TEXTURE_OBJECT_VALID_QCOM 0x8BDB +#define GL_STATE_RESTORE 0x8BDC +#endif + +/* GL_QCOM_extended_get2 */ +/* No new tokens introduced by this extension. */ + +/* GL_QCOM_perfmon_global_mode */ +#ifndef GL_QCOM_perfmon_global_mode +#define GL_PERFMON_GLOBAL_MODE_QCOM 0x8FA0 +#endif + +/* GL_QCOM_writeonly_rendering */ +#ifndef GL_QCOM_writeonly_rendering +#define GL_WRITEONLY_RENDERING_QCOM 0x8823 +#endif + +/* GL_QCOM_tiled_rendering */ +#ifndef GL_QCOM_tiled_rendering +#define GL_COLOR_BUFFER_BIT0_QCOM 0x00000001 +#define GL_COLOR_BUFFER_BIT1_QCOM 0x00000002 +#define GL_COLOR_BUFFER_BIT2_QCOM 0x00000004 +#define GL_COLOR_BUFFER_BIT3_QCOM 0x00000008 +#define GL_COLOR_BUFFER_BIT4_QCOM 0x00000010 +#define GL_COLOR_BUFFER_BIT5_QCOM 0x00000020 +#define GL_COLOR_BUFFER_BIT6_QCOM 0x00000040 +#define GL_COLOR_BUFFER_BIT7_QCOM 0x00000080 +#define GL_DEPTH_BUFFER_BIT0_QCOM 0x00000100 +#define GL_DEPTH_BUFFER_BIT1_QCOM 0x00000200 +#define GL_DEPTH_BUFFER_BIT2_QCOM 0x00000400 +#define GL_DEPTH_BUFFER_BIT3_QCOM 0x00000800 +#define GL_DEPTH_BUFFER_BIT4_QCOM 0x00001000 +#define GL_DEPTH_BUFFER_BIT5_QCOM 0x00002000 +#define GL_DEPTH_BUFFER_BIT6_QCOM 0x00004000 +#define GL_DEPTH_BUFFER_BIT7_QCOM 0x00008000 +#define GL_STENCIL_BUFFER_BIT0_QCOM 0x00010000 +#define GL_STENCIL_BUFFER_BIT1_QCOM 0x00020000 +#define GL_STENCIL_BUFFER_BIT2_QCOM 0x00040000 +#define GL_STENCIL_BUFFER_BIT3_QCOM 0x00080000 +#define GL_STENCIL_BUFFER_BIT4_QCOM 0x00100000 +#define GL_STENCIL_BUFFER_BIT5_QCOM 0x00200000 +#define GL_STENCIL_BUFFER_BIT6_QCOM 0x00400000 +#define GL_STENCIL_BUFFER_BIT7_QCOM 0x00800000 +#define GL_MULTISAMPLE_BUFFER_BIT0_QCOM 0x01000000 +#define GL_MULTISAMPLE_BUFFER_BIT1_QCOM 0x02000000 +#define GL_MULTISAMPLE_BUFFER_BIT2_QCOM 0x04000000 +#define GL_MULTISAMPLE_BUFFER_BIT3_QCOM 0x08000000 +#define GL_MULTISAMPLE_BUFFER_BIT4_QCOM 0x10000000 +#define GL_MULTISAMPLE_BUFFER_BIT5_QCOM 0x20000000 +#define GL_MULTISAMPLE_BUFFER_BIT6_QCOM 0x40000000 +#define GL_MULTISAMPLE_BUFFER_BIT7_QCOM 0x80000000 +#endif + +/*------------------------------------------------------------------------* + * End of extension tokens, start of corresponding extension functions + *------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------* + * OES extension functions + *------------------------------------------------------------------------*/ + +/* GL_OES_blend_equation_separate */ +#ifndef GL_OES_blend_equation_separate +#define GL_OES_blend_equation_separate 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY glBlendEquationSeparateOES (GLenum modeRGB, GLenum modeAlpha); +#endif +typedef void (GL_APIENTRYP PFNGLBLENDEQUATIONSEPARATEOESPROC) (GLenum modeRGB, GLenum modeAlpha); +#endif + +/* GL_OES_blend_func_separate */ +#ifndef GL_OES_blend_func_separate +#define GL_OES_blend_func_separate 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY glBlendFuncSeparateOES (GLenum srcRGB, GLenum dstRGB, GLenum srcAlpha, GLenum dstAlpha); +#endif +typedef void (GL_APIENTRYP PFNGLBLENDFUNCSEPARATEOESPROC) (GLenum srcRGB, GLenum dstRGB, GLenum srcAlpha, GLenum dstAlpha); +#endif + +/* GL_OES_blend_subtract */ +#ifndef GL_OES_blend_subtract +#define GL_OES_blend_subtract 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY glBlendEquationOES (GLenum mode); +#endif +typedef void (GL_APIENTRYP PFNGLBLENDEQUATIONOESPROC) (GLenum mode); +#endif + +/* GL_OES_byte_coordinates */ +#ifndef GL_OES_byte_coordinates +#define GL_OES_byte_coordinates 1 +#endif + +/* GL_OES_compressed_ETC1_RGB8_texture */ +#ifndef GL_OES_compressed_ETC1_RGB8_texture +#define GL_OES_compressed_ETC1_RGB8_texture 1 +#endif + +/* GL_OES_depth24 */ +#ifndef GL_OES_depth24 +#define GL_OES_depth24 1 +#endif + +/* GL_OES_depth32 */ +#ifndef GL_OES_depth32 +#define GL_OES_depth32 1 +#endif + +/* GL_OES_draw_texture */ +#ifndef GL_OES_draw_texture +#define GL_OES_draw_texture 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY glDrawTexsOES (GLshort x, GLshort y, GLshort z, GLshort width, GLshort height); +GL_API void GL_APIENTRY glDrawTexiOES (GLint x, GLint y, GLint z, GLint width, GLint height); +GL_API void GL_APIENTRY glDrawTexxOES (GLfixed x, GLfixed y, GLfixed z, GLfixed width, GLfixed height); +GL_API void GL_APIENTRY glDrawTexsvOES (const GLshort *coords); +GL_API void GL_APIENTRY glDrawTexivOES (const GLint *coords); +GL_API void GL_APIENTRY glDrawTexxvOES (const GLfixed *coords); +GL_API void GL_APIENTRY glDrawTexfOES (GLfloat x, GLfloat y, GLfloat z, GLfloat width, GLfloat height); +GL_API void GL_APIENTRY glDrawTexfvOES (const GLfloat *coords); +#endif +typedef void (GL_APIENTRYP PFNGLDRAWTEXSOESPROC) (GLshort x, GLshort y, GLshort z, GLshort width, GLshort height); +typedef void (GL_APIENTRYP PFNGLDRAWTEXIOESPROC) (GLint x, GLint y, GLint z, GLint width, GLint height); +typedef void (GL_APIENTRYP PFNGLDRAWTEXXOESPROC) (GLfixed x, GLfixed y, GLfixed z, GLfixed width, GLfixed height); +typedef void (GL_APIENTRYP PFNGLDRAWTEXSVOESPROC) (const GLshort *coords); +typedef void (GL_APIENTRYP PFNGLDRAWTEXIVOESPROC) (const GLint *coords); +typedef void (GL_APIENTRYP PFNGLDRAWTEXXVOESPROC) (const GLfixed *coords); +typedef void (GL_APIENTRYP PFNGLDRAWTEXFOESPROC) (GLfloat x, GLfloat y, GLfloat z, GLfloat width, GLfloat height); +typedef void (GL_APIENTRYP PFNGLDRAWTEXFVOESPROC) (const GLfloat *coords); +#endif + +/* GL_OES_EGL_image */ +#ifndef GL_OES_EGL_image +#define GL_OES_EGL_image 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY glEGLImageTargetTexture2DOES (GLenum target, GLeglImageOES image); +GL_API void GL_APIENTRY glEGLImageTargetRenderbufferStorageOES (GLenum target, GLeglImageOES image); +#endif +typedef void (GL_APIENTRYP PFNGLEGLIMAGETARGETTEXTURE2DOESPROC) (GLenum target, GLeglImageOES image); +typedef void (GL_APIENTRYP PFNGLEGLIMAGETARGETRENDERBUFFERSTORAGEOESPROC) (GLenum target, GLeglImageOES image); +#endif + +/* GL_OES_EGL_image_external */ +#ifndef GL_OES_EGL_image_external +#define GL_OES_EGL_image_external 1 +/* glEGLImageTargetTexture2DOES defined in GL_OES_EGL_image already. */ +#endif + +/* GL_OES_element_index_uint */ +#ifndef GL_OES_element_index_uint +#define GL_OES_element_index_uint 1 +#endif + +/* GL_OES_extended_matrix_palette */ +#ifndef GL_OES_extended_matrix_palette +#define GL_OES_extended_matrix_palette 1 +#endif + +/* GL_OES_fbo_render_mipmap */ +#ifndef GL_OES_fbo_render_mipmap +#define GL_OES_fbo_render_mipmap 1 +#endif + +/* GL_OES_fixed_point */ +#ifndef GL_OES_fixed_point +#define GL_OES_fixed_point 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY glAlphaFuncxOES (GLenum func, GLclampx ref); +GL_API void GL_APIENTRY glClearColorxOES (GLclampx red, GLclampx green, GLclampx blue, GLclampx alpha); +GL_API void GL_APIENTRY glClearDepthxOES (GLclampx depth); +GL_API void GL_APIENTRY glClipPlanexOES (GLenum plane, const GLfixed *equation); +GL_API void GL_APIENTRY glColor4xOES (GLfixed red, GLfixed green, GLfixed blue, GLfixed alpha); +GL_API void GL_APIENTRY glDepthRangexOES (GLclampx zNear, GLclampx zFar); +GL_API void GL_APIENTRY glFogxOES (GLenum pname, GLfixed param); +GL_API void GL_APIENTRY glFogxvOES (GLenum pname, const GLfixed *params); +GL_API void GL_APIENTRY glFrustumxOES (GLfixed left, GLfixed right, GLfixed bottom, GLfixed top, GLfixed zNear, GLfixed zFar); +GL_API void GL_APIENTRY glGetClipPlanexOES (GLenum pname, GLfixed eqn[4]); +GL_API void GL_APIENTRY glGetFixedvOES (GLenum pname, GLfixed *params); +GL_API void GL_APIENTRY glGetLightxvOES (GLenum light, GLenum pname, GLfixed *params); +GL_API void GL_APIENTRY glGetMaterialxvOES (GLenum face, GLenum pname, GLfixed *params); +GL_API void GL_APIENTRY glGetTexEnvxvOES (GLenum env, GLenum pname, GLfixed *params); +GL_API void GL_APIENTRY glGetTexParameterxvOES (GLenum target, GLenum pname, GLfixed *params); +GL_API void GL_APIENTRY glLightModelxOES (GLenum pname, GLfixed param); +GL_API void GL_APIENTRY glLightModelxvOES (GLenum pname, const GLfixed *params); +GL_API void GL_APIENTRY glLightxOES (GLenum light, GLenum pname, GLfixed param); +GL_API void GL_APIENTRY glLightxvOES (GLenum light, GLenum pname, const GLfixed *params); +GL_API void GL_APIENTRY glLineWidthxOES (GLfixed width); +GL_API void GL_APIENTRY glLoadMatrixxOES (const GLfixed *m); +GL_API void GL_APIENTRY glMaterialxOES (GLenum face, GLenum pname, GLfixed param); +GL_API void GL_APIENTRY glMaterialxvOES (GLenum face, GLenum pname, const GLfixed *params); +GL_API void GL_APIENTRY glMultMatrixxOES (const GLfixed *m); +GL_API void GL_APIENTRY glMultiTexCoord4xOES (GLenum target, GLfixed s, GLfixed t, GLfixed r, GLfixed q); +GL_API void GL_APIENTRY glNormal3xOES (GLfixed nx, GLfixed ny, GLfixed nz); +GL_API void GL_APIENTRY glOrthoxOES (GLfixed left, GLfixed right, GLfixed bottom, GLfixed top, GLfixed zNear, GLfixed zFar); +GL_API void GL_APIENTRY glPointParameterxOES (GLenum pname, GLfixed param); +GL_API void GL_APIENTRY glPointParameterxvOES (GLenum pname, const GLfixed *params); +GL_API void GL_APIENTRY glPointSizexOES (GLfixed size); +GL_API void GL_APIENTRY glPolygonOffsetxOES (GLfixed factor, GLfixed units); +GL_API void GL_APIENTRY glRotatexOES (GLfixed angle, GLfixed x, GLfixed y, GLfixed z); +GL_API void GL_APIENTRY glSampleCoveragexOES (GLclampx value, GLboolean invert); +GL_API void GL_APIENTRY glScalexOES (GLfixed x, GLfixed y, GLfixed z); +GL_API void GL_APIENTRY glTexEnvxOES (GLenum target, GLenum pname, GLfixed param); +GL_API void GL_APIENTRY glTexEnvxvOES (GLenum target, GLenum pname, const GLfixed *params); +GL_API void GL_APIENTRY glTexParameterxOES (GLenum target, GLenum pname, GLfixed param); +GL_API void GL_APIENTRY glTexParameterxvOES (GLenum target, GLenum pname, const GLfixed *params); +GL_API void GL_APIENTRY glTranslatexOES (GLfixed x, GLfixed y, GLfixed z); +#endif +typedef void (GL_APIENTRYP PFNGLALPHAFUNCXOESPROC) (GLenum func, GLclampx ref); +typedef void (GL_APIENTRYP PFNGLCLEARCOLORXOESPROC) (GLclampx red, GLclampx green, GLclampx blue, GLclampx alpha); +typedef void (GL_APIENTRYP PFNGLCLEARDEPTHXOESPROC) (GLclampx depth); +typedef void (GL_APIENTRYP PFNGLCLIPPLANEXOESPROC) (GLenum plane, const GLfixed *equation); +typedef void (GL_APIENTRYP PFNGLCOLOR4XOESPROC) (GLfixed red, GLfixed green, GLfixed blue, GLfixed alpha); +typedef void (GL_APIENTRYP PFNGLDEPTHRANGEXOESPROC) (GLclampx zNear, GLclampx zFar); +typedef void (GL_APIENTRYP PFNGLFOGXOESPROC) (GLenum pname, GLfixed param); +typedef void (GL_APIENTRYP PFNGLFOGXVOESPROC) (GLenum pname, const GLfixed *params); +typedef void (GL_APIENTRYP PFNGLFRUSTUMXOESPROC) (GLfixed left, GLfixed right, GLfixed bottom, GLfixed top, GLfixed zNear, GLfixed zFar); +typedef void (GL_APIENTRYP PFNGLGETCLIPPLANEXOESPROC) (GLenum pname, GLfixed eqn[4]); +typedef void (GL_APIENTRYP PFNGLGETFIXEDVOESPROC) (GLenum pname, GLfixed *params); +typedef void (GL_APIENTRYP PFNGLGETLIGHTXVOESPROC) (GLenum light, GLenum pname, GLfixed *params); +typedef void (GL_APIENTRYP PFNGLGETMATERIALXVOESPROC) (GLenum face, GLenum pname, GLfixed *params); +typedef void (GL_APIENTRYP PFNGLGETTEXENVXVOESPROC) (GLenum env, GLenum pname, GLfixed *params); +typedef void (GL_APIENTRYP PFNGLGETTEXPARAMETERXVOESPROC) (GLenum target, GLenum pname, GLfixed *params); +typedef void (GL_APIENTRYP PFNGLLIGHTMODELXOESPROC) (GLenum pname, GLfixed param); +typedef void (GL_APIENTRYP PFNGLLIGHTMODELXVOESPROC) (GLenum pname, const GLfixed *params); +typedef void (GL_APIENTRYP PFNGLLIGHTXOESPROC) (GLenum light, GLenum pname, GLfixed param); +typedef void (GL_APIENTRYP PFNGLLIGHTXVOESPROC) (GLenum light, GLenum pname, const GLfixed *params); +typedef void (GL_APIENTRYP PFNGLLINEWIDTHXOESPROC) (GLfixed width); +typedef void (GL_APIENTRYP PFNGLLOADMATRIXXOESPROC) (const GLfixed *m); +typedef void (GL_APIENTRYP PFNGLMATERIALXOESPROC) (GLenum face, GLenum pname, GLfixed param); +typedef void (GL_APIENTRYP PFNGLMATERIALXVOESPROC) (GLenum face, GLenum pname, const GLfixed *params); +typedef void (GL_APIENTRYP PFNGLMULTMATRIXXOESPROC) (const GLfixed *m); +typedef void (GL_APIENTRYP PFNGLMULTITEXCOORD4XOESPROC) (GLenum target, GLfixed s, GLfixed t, GLfixed r, GLfixed q); +typedef void (GL_APIENTRYP PFNGLNORMAL3XOESPROC) (GLfixed nx, GLfixed ny, GLfixed nz); +typedef void (GL_APIENTRYP PFNGLORTHOXOESPROC) (GLfixed left, GLfixed right, GLfixed bottom, GLfixed top, GLfixed zNear, GLfixed zFar); +typedef void (GL_APIENTRYP PFNGLPOINTPARAMETERXOESPROC) (GLenum pname, GLfixed param); +typedef void (GL_APIENTRYP PFNGLPOINTPARAMETERXVOESPROC) (GLenum pname, const GLfixed *params); +typedef void (GL_APIENTRYP PFNGLPOINTSIZEXOESPROC) (GLfixed size); +typedef void (GL_APIENTRYP PFNGLPOLYGONOFFSETXOESPROC) (GLfixed factor, GLfixed units); +typedef void (GL_APIENTRYP PFNGLROTATEXOESPROC) (GLfixed angle, GLfixed x, GLfixed y, GLfixed z); +typedef void (GL_APIENTRYP PFNGLSAMPLECOVERAGEXOESPROC) (GLclampx value, GLboolean invert); +typedef void (GL_APIENTRYP PFNGLSCALEXOESPROC) (GLfixed x, GLfixed y, GLfixed z); +typedef void (GL_APIENTRYP PFNGLTEXENVXOESPROC) (GLenum target, GLenum pname, GLfixed param); +typedef void (GL_APIENTRYP PFNGLTEXENVXVOESPROC) (GLenum target, GLenum pname, const GLfixed *params); +typedef void (GL_APIENTRYP PFNGLTEXPARAMETERXOESPROC) (GLenum target, GLenum pname, GLfixed param); +typedef void (GL_APIENTRYP PFNGLTEXPARAMETERXVOESPROC) (GLenum target, GLenum pname, const GLfixed *params); +typedef void (GL_APIENTRYP PFNGLTRANSLATEXOESPROC) (GLfixed x, GLfixed y, GLfixed z); +#endif + +/* GL_OES_framebuffer_object */ +#ifndef GL_OES_framebuffer_object +#define GL_OES_framebuffer_object 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API GLboolean GL_APIENTRY glIsRenderbufferOES (GLuint renderbuffer); +GL_API void GL_APIENTRY glBindRenderbufferOES (GLenum target, GLuint renderbuffer); +GL_API void GL_APIENTRY glDeleteRenderbuffersOES (GLsizei n, const GLuint* renderbuffers); +GL_API void GL_APIENTRY glGenRenderbuffersOES (GLsizei n, GLuint* renderbuffers); +GL_API void GL_APIENTRY glRenderbufferStorageOES (GLenum target, GLenum internalformat, GLsizei width, GLsizei height); +GL_API void GL_APIENTRY glGetRenderbufferParameterivOES (GLenum target, GLenum pname, GLint* params); +GL_API GLboolean GL_APIENTRY glIsFramebufferOES (GLuint framebuffer); +GL_API void GL_APIENTRY glBindFramebufferOES (GLenum target, GLuint framebuffer); +GL_API void GL_APIENTRY glDeleteFramebuffersOES (GLsizei n, const GLuint* framebuffers); +GL_API void GL_APIENTRY glGenFramebuffersOES (GLsizei n, GLuint* framebuffers); +GL_API GLenum GL_APIENTRY glCheckFramebufferStatusOES (GLenum target); +GL_API void GL_APIENTRY glFramebufferRenderbufferOES (GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer); +GL_API void GL_APIENTRY glFramebufferTexture2DOES (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); +GL_API void GL_APIENTRY glGetFramebufferAttachmentParameterivOES (GLenum target, GLenum attachment, GLenum pname, GLint* params); +GL_API void GL_APIENTRY glGenerateMipmapOES (GLenum target); +#endif +typedef GLboolean (GL_APIENTRYP PFNGLISRENDERBUFFEROESPROC) (GLuint renderbuffer); +typedef void (GL_APIENTRYP PFNGLBINDRENDERBUFFEROESPROC) (GLenum target, GLuint renderbuffer); +typedef void (GL_APIENTRYP PFNGLDELETERENDERBUFFERSOESPROC) (GLsizei n, const GLuint* renderbuffers); +typedef void (GL_APIENTRYP PFNGLGENRENDERBUFFERSOESPROC) (GLsizei n, GLuint* renderbuffers); +typedef void (GL_APIENTRYP PFNGLRENDERBUFFERSTORAGEOESPROC) (GLenum target, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (GL_APIENTRYP PFNGLGETRENDERBUFFERPARAMETERIVOESPROC) (GLenum target, GLenum pname, GLint* params); +typedef GLboolean (GL_APIENTRYP PFNGLISFRAMEBUFFEROESPROC) (GLuint framebuffer); +typedef void (GL_APIENTRYP PFNGLBINDFRAMEBUFFEROESPROC) (GLenum target, GLuint framebuffer); +typedef void (GL_APIENTRYP PFNGLDELETEFRAMEBUFFERSOESPROC) (GLsizei n, const GLuint* framebuffers); +typedef void (GL_APIENTRYP PFNGLGENFRAMEBUFFERSOESPROC) (GLsizei n, GLuint* framebuffers); +typedef GLenum (GL_APIENTRYP PFNGLCHECKFRAMEBUFFERSTATUSOESPROC) (GLenum target); +typedef void (GL_APIENTRYP PFNGLFRAMEBUFFERRENDERBUFFEROESPROC) (GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer); +typedef void (GL_APIENTRYP PFNGLFRAMEBUFFERTEXTURE2DOESPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); +typedef void (GL_APIENTRYP PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVOESPROC) (GLenum target, GLenum attachment, GLenum pname, GLint* params); +typedef void (GL_APIENTRYP PFNGLGENERATEMIPMAPOESPROC) (GLenum target); +#endif + +/* GL_OES_mapbuffer */ +#ifndef GL_OES_mapbuffer +#define GL_OES_mapbuffer 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void* GL_APIENTRY glMapBufferOES (GLenum target, GLenum access); +GL_API GLboolean GL_APIENTRY glUnmapBufferOES (GLenum target); +GL_API void GL_APIENTRY glGetBufferPointervOES (GLenum target, GLenum pname, GLvoid ** params); +#endif +typedef void* (GL_APIENTRYP PFNGLMAPBUFFEROESPROC) (GLenum target, GLenum access); +typedef GLboolean (GL_APIENTRYP PFNGLUNMAPBUFFEROESPROC) (GLenum target); +typedef void (GL_APIENTRYP PFNGLGETBUFFERPOINTERVOESPROC) (GLenum target, GLenum pname, GLvoid ** params); +#endif + +/* GL_OES_matrix_get */ +#ifndef GL_OES_matrix_get +#define GL_OES_matrix_get 1 +#endif + +/* GL_OES_matrix_palette */ +#ifndef GL_OES_matrix_palette +#define GL_OES_matrix_palette 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY glCurrentPaletteMatrixOES (GLuint matrixpaletteindex); +GL_API void GL_APIENTRY glLoadPaletteFromModelViewMatrixOES (void); +GL_API void GL_APIENTRY glMatrixIndexPointerOES (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer); +GL_API void GL_APIENTRY glWeightPointerOES (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer); +#endif +typedef void (GL_APIENTRYP PFNGLCURRENTPALETTEMATRIXOESPROC) (GLuint matrixpaletteindex); +typedef void (GL_APIENTRYP PFNGLLOADPALETTEFROMMODELVIEWMATRIXOESPROC) (void); +typedef void (GL_APIENTRYP PFNGLMATRIXINDEXPOINTEROESPROC) (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer); +typedef void (GL_APIENTRYP PFNGLWEIGHTPOINTEROESPROC) (GLint size, GLenum type, GLsizei stride, const GLvoid *pointer); +#endif + +/* GL_OES_packed_depth_stencil */ +#ifndef GL_OES_packed_depth_stencil +#define GL_OES_packed_depth_stencil 1 +#endif + +/* GL_OES_required_internalformat */ +#ifndef GL_OES_required_internalformat +#define GL_OES_required_internalformat 1 +#endif + +/* GL_OES_query_matrix */ +#ifndef GL_OES_query_matrix +#define GL_OES_query_matrix 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API GLbitfield GL_APIENTRY glQueryMatrixxOES (GLfixed mantissa[16], GLint exponent[16]); +#endif +typedef GLbitfield (GL_APIENTRYP PFNGLQUERYMATRIXXOESPROC) (GLfixed mantissa[16], GLint exponent[16]); +#endif + +/* GL_OES_rgb8_rgba8 */ +#ifndef GL_OES_rgb8_rgba8 +#define GL_OES_rgb8_rgba8 1 +#endif + +/* GL_OES_single_precision */ +#ifndef GL_OES_single_precision +#define GL_OES_single_precision 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY glDepthRangefOES (GLclampf zNear, GLclampf zFar); +GL_API void GL_APIENTRY glFrustumfOES (GLfloat left, GLfloat right, GLfloat bottom, GLfloat top, GLfloat zNear, GLfloat zFar); +GL_API void GL_APIENTRY glOrthofOES (GLfloat left, GLfloat right, GLfloat bottom, GLfloat top, GLfloat zNear, GLfloat zFar); +GL_API void GL_APIENTRY glClipPlanefOES (GLenum plane, const GLfloat *equation); +GL_API void GL_APIENTRY glGetClipPlanefOES (GLenum pname, GLfloat eqn[4]); +GL_API void GL_APIENTRY glClearDepthfOES (GLclampf depth); +#endif +typedef void (GL_APIENTRYP PFNGLDEPTHRANGEFOESPROC) (GLclampf zNear, GLclampf zFar); +typedef void (GL_APIENTRYP PFNGLFRUSTUMFOESPROC) (GLfloat left, GLfloat right, GLfloat bottom, GLfloat top, GLfloat zNear, GLfloat zFar); +typedef void (GL_APIENTRYP PFNGLORTHOFOESPROC) (GLfloat left, GLfloat right, GLfloat bottom, GLfloat top, GLfloat zNear, GLfloat zFar); +typedef void (GL_APIENTRYP PFNGLCLIPPLANEFOESPROC) (GLenum plane, const GLfloat *equation); +typedef void (GL_APIENTRYP PFNGLGETCLIPPLANEFOESPROC) (GLenum pname, GLfloat eqn[4]); +typedef void (GL_APIENTRYP PFNGLCLEARDEPTHFOESPROC) (GLclampf depth); +#endif + +/* GL_OES_stencil1 */ +#ifndef GL_OES_stencil1 +#define GL_OES_stencil1 1 +#endif + +/* GL_OES_stencil4 */ +#ifndef GL_OES_stencil4 +#define GL_OES_stencil4 1 +#endif + +/* GL_OES_stencil8 */ +#ifndef GL_OES_stencil8 +#define GL_OES_stencil8 1 +#endif + +/* GL_OES_stencil_wrap */ +#ifndef GL_OES_stencil_wrap +#define GL_OES_stencil_wrap 1 +#endif + +/* GL_OES_texture_cube_map */ +#ifndef GL_OES_texture_cube_map +#define GL_OES_texture_cube_map 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY glTexGenfOES (GLenum coord, GLenum pname, GLfloat param); +GL_API void GL_APIENTRY glTexGenfvOES (GLenum coord, GLenum pname, const GLfloat *params); +GL_API void GL_APIENTRY glTexGeniOES (GLenum coord, GLenum pname, GLint param); +GL_API void GL_APIENTRY glTexGenivOES (GLenum coord, GLenum pname, const GLint *params); +GL_API void GL_APIENTRY glTexGenxOES (GLenum coord, GLenum pname, GLfixed param); +GL_API void GL_APIENTRY glTexGenxvOES (GLenum coord, GLenum pname, const GLfixed *params); +GL_API void GL_APIENTRY glGetTexGenfvOES (GLenum coord, GLenum pname, GLfloat *params); +GL_API void GL_APIENTRY glGetTexGenivOES (GLenum coord, GLenum pname, GLint *params); +GL_API void GL_APIENTRY glGetTexGenxvOES (GLenum coord, GLenum pname, GLfixed *params); +#endif +typedef void (GL_APIENTRYP PFNGLTEXGENFOESPROC) (GLenum coord, GLenum pname, GLfloat param); +typedef void (GL_APIENTRYP PFNGLTEXGENFVOESPROC) (GLenum coord, GLenum pname, const GLfloat *params); +typedef void (GL_APIENTRYP PFNGLTEXGENIOESPROC) (GLenum coord, GLenum pname, GLint param); +typedef void (GL_APIENTRYP PFNGLTEXGENIVOESPROC) (GLenum coord, GLenum pname, const GLint *params); +typedef void (GL_APIENTRYP PFNGLTEXGENXOESPROC) (GLenum coord, GLenum pname, GLfixed param); +typedef void (GL_APIENTRYP PFNGLTEXGENXVOESPROC) (GLenum coord, GLenum pname, const GLfixed *params); +typedef void (GL_APIENTRYP PFNGLGETTEXGENFVOESPROC) (GLenum coord, GLenum pname, GLfloat *params); +typedef void (GL_APIENTRYP PFNGLGETTEXGENIVOESPROC) (GLenum coord, GLenum pname, GLint *params); +typedef void (GL_APIENTRYP PFNGLGETTEXGENXVOESPROC) (GLenum coord, GLenum pname, GLfixed *params); +#endif + +/* GL_OES_texture_env_crossbar */ +#ifndef GL_OES_texture_env_crossbar +#define GL_OES_texture_env_crossbar 1 +#endif + +/* GL_OES_texture_mirrored_repeat */ +#ifndef GL_OES_texture_mirrored_repeat +#define GL_OES_texture_mirrored_repeat 1 +#endif + +/* GL_OES_vertex_array_object */ +#ifndef GL_OES_vertex_array_object +#define GL_OES_vertex_array_object 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY glBindVertexArrayOES (GLuint array); +GL_API void GL_APIENTRY glDeleteVertexArraysOES (GLsizei n, const GLuint *arrays); +GL_API void GL_APIENTRY glGenVertexArraysOES (GLsizei n, GLuint *arrays); +GL_API GLboolean GL_APIENTRY glIsVertexArrayOES (GLuint array); +#endif +typedef void (GL_APIENTRYP PFNGLBINDVERTEXARRAYOESPROC) (GLuint array); +typedef void (GL_APIENTRYP PFNGLDELETEVERTEXARRAYSOESPROC) (GLsizei n, const GLuint *arrays); +typedef void (GL_APIENTRYP PFNGLGENVERTEXARRAYSOESPROC) (GLsizei n, GLuint *arrays); +typedef GLboolean (GL_APIENTRYP PFNGLISVERTEXARRAYOESPROC) (GLuint array); +#endif + +/*------------------------------------------------------------------------* + * AMD extension functions + *------------------------------------------------------------------------*/ + +/* GL_AMD_compressed_3DC_texture */ +#ifndef GL_AMD_compressed_3DC_texture +#define GL_AMD_compressed_3DC_texture 1 +#endif + +/* GL_AMD_compressed_ATC_texture */ +#ifndef GL_AMD_compressed_ATC_texture +#define GL_AMD_compressed_ATC_texture 1 +#endif + +/*------------------------------------------------------------------------* + * APPLE extension functions + *------------------------------------------------------------------------*/ + +/* GL_APPLE_copy_texture_levels */ +#ifndef GL_APPLE_copy_texture_levels +#define GL_APPLE_copy_texture_levels 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY glCopyTextureLevelsAPPLE (GLuint destinationTexture, GLuint sourceTexture, GLint sourceBaseLevel, GLsizei sourceLevelCount); +#endif +typedef void (GL_APIENTRYP PFNGLCOPYTEXTURELEVELSAPPLEPROC) (GLuint destinationTexture, GLuint sourceTexture, GLint sourceBaseLevel, GLsizei sourceLevelCount); +#endif + +/* GL_APPLE_framebuffer_multisample */ +#ifndef GL_APPLE_framebuffer_multisample +#define GL_APPLE_framebuffer_multisample 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY glRenderbufferStorageMultisampleAPPLE (GLenum, GLsizei, GLenum, GLsizei, GLsizei); +GL_API void GL_APIENTRY glResolveMultisampleFramebufferAPPLE (void); +#endif /* GL_GLEXT_PROTOTYPES */ +typedef void (GL_APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEAPPLEPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (GL_APIENTRYP PFNGLRESOLVEMULTISAMPLEFRAMEBUFFERAPPLEPROC) (void); +#endif + +/* GL_APPLE_sync */ +#ifndef GL_APPLE_sync +#define GL_APPLE_sync 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API GLsync GL_APIENTRY glFenceSyncAPPLE (GLenum condition, GLbitfield flags); +GL_API GLboolean GL_APIENTRY glIsSyncAPPLE (GLsync sync); +GL_API void GL_APIENTRY glDeleteSyncAPPLE (GLsync sync); +GL_API GLenum GL_APIENTRY glClientWaitSyncAPPLE (GLsync sync, GLbitfield flags, GLuint64 timeout); +GL_API void GL_APIENTRY glWaitSyncAPPLE (GLsync sync, GLbitfield flags, GLuint64 timeout); +GL_API void GL_APIENTRY glGetInteger64vAPPLE (GLenum pname, GLint64 *params); +GL_API void GL_APIENTRY glGetSyncivAPPLE (GLsync sync, GLenum pname, GLsizei bufSize, GLsizei *length, GLint *values); +#endif +typedef GLsync (GL_APIENTRYP PFNGLFENCESYNCAPPLEPROC) (GLenum condition, GLbitfield flags); +typedef GLboolean (GL_APIENTRYP PFNGLISSYNCAPPLEPROC) (GLsync sync); +typedef void (GL_APIENTRYP PFNGLDELETESYNCAPPLEPROC) (GLsync sync); +typedef GLenum (GL_APIENTRYP PFNGLCLIENTWAITSYNCAPPLEPROC) (GLsync sync, GLbitfield flags, GLuint64 timeout); +typedef void (GL_APIENTRYP PFNGLWAITSYNCAPPLEPROC) (GLsync sync, GLbitfield flags, GLuint64 timeout); +typedef void (GL_APIENTRYP PFNGLGETINTEGER64VAPPLEPROC) (GLenum pname, GLint64 *params); +typedef void (GL_APIENTRYP PFNGLGETSYNCIVAPPLEPROC) (GLsync sync, GLenum pname, GLsizei bufSize, GLsizei *length, GLint *values); +#endif + +/* GL_APPLE_texture_2D_limited_npot */ +#ifndef GL_APPLE_texture_2D_limited_npot +#define GL_APPLE_texture_2D_limited_npot 1 +#endif + +/* GL_APPLE_texture_format_BGRA8888 */ +#ifndef GL_APPLE_texture_format_BGRA8888 +#define GL_APPLE_texture_format_BGRA8888 1 +#endif + +/* GL_APPLE_texture_max_level */ +#ifndef GL_APPLE_texture_max_level +#define GL_APPLE_texture_max_level 1 +#endif + +/*------------------------------------------------------------------------* + * ARM extension functions + *------------------------------------------------------------------------*/ + +/* GL_ARM_rgba8 */ +#ifndef GL_ARM_rgba8 +#define GL_ARM_rgba8 1 +#endif + +/*------------------------------------------------------------------------* + * EXT extension functions + *------------------------------------------------------------------------*/ + +/* GL_EXT_blend_minmax */ +#ifndef GL_EXT_blend_minmax +#define GL_EXT_blend_minmax 1 +#endif + +/* GL_EXT_discard_framebuffer */ +#ifndef GL_EXT_discard_framebuffer +#define GL_EXT_discard_framebuffer 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY glDiscardFramebufferEXT (GLenum target, GLsizei numAttachments, const GLenum *attachments); +#endif +typedef void (GL_APIENTRYP PFNGLDISCARDFRAMEBUFFEREXTPROC) (GLenum target, GLsizei numAttachments, const GLenum *attachments); +#endif + +/* GL_EXT_map_buffer_range */ +#ifndef GL_EXT_map_buffer_range +#define GL_EXT_map_buffer_range 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY *glMapBufferRangeEXT (GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access); +GL_API void GL_APIENTRY glFlushMappedBufferRangeEXT (GLenum target, GLintptr offset, GLsizeiptr length); +#endif +typedef void* (GL_APIENTRYP PFNGLMAPBUFFERRANGEEXTPROC) (GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access); +typedef void (GL_APIENTRYP PFNGLFLUSHMAPPEDBUFFERRANGEEXTPROC) (GLenum target, GLintptr offset, GLsizeiptr length); +#endif + +/* GL_EXT_multisampled_render_to_texture */ +#ifndef GL_EXT_multisampled_render_to_texture +#define GL_EXT_multisampled_render_to_texture 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY glRenderbufferStorageMultisampleEXT (GLenum, GLsizei, GLenum, GLsizei, GLsizei); +GL_API void GL_APIENTRY glFramebufferTexture2DMultisampleEXT (GLenum, GLenum, GLenum, GLuint, GLint, GLsizei); +#endif +typedef void (GL_APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (GL_APIENTRYP PFNGLFRAMEBUFFERTEXTURE2DMULTISAMPLEEXTPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLsizei samples); +#endif + +/* GL_EXT_multi_draw_arrays */ +#ifndef GL_EXT_multi_draw_arrays +#define GL_EXT_multi_draw_arrays 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY glMultiDrawArraysEXT (GLenum, GLint *, GLsizei *, GLsizei); +GL_API void GL_APIENTRY glMultiDrawElementsEXT (GLenum, const GLsizei *, GLenum, const GLvoid* *, GLsizei); +#endif /* GL_GLEXT_PROTOTYPES */ +typedef void (GL_APIENTRYP PFNGLMULTIDRAWARRAYSEXTPROC) (GLenum mode, GLint *first, GLsizei *count, GLsizei primcount); +typedef void (GL_APIENTRYP PFNGLMULTIDRAWELEMENTSEXTPROC) (GLenum mode, const GLsizei *count, GLenum type, const GLvoid* *indices, GLsizei primcount); +#endif + +/* GL_EXT_read_format_bgra */ +#ifndef GL_EXT_read_format_bgra +#define GL_EXT_read_format_bgra 1 +#endif + +/* GL_EXT_robustness */ +#ifndef GL_EXT_robustness +#define GL_EXT_robustness 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API GLenum GL_APIENTRY glGetGraphicsResetStatusEXT (void); +GL_API void GL_APIENTRY glReadnPixelsEXT (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLsizei bufSize, void *data); +GL_API void GL_APIENTRY glGetnUniformfvEXT (GLuint program, GLint location, GLsizei bufSize, float *params); +GL_API void GL_APIENTRY glGetnUniformivEXT (GLuint program, GLint location, GLsizei bufSize, GLint *params); +#endif +typedef GLenum (GL_APIENTRYP PFNGLGETGRAPHICSRESETSTATUSEXTPROC) (void); +typedef void (GL_APIENTRYP PFNGLREADNPIXELSEXTPROC) (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLsizei bufSize, void *data); +typedef void (GL_APIENTRYP PFNGLGETNUNIFORMFVEXTPROC) (GLuint program, GLint location, GLsizei bufSize, float *params); +typedef void (GL_APIENTRYP PFNGLGETNUNIFORMIVEXTPROC) (GLuint program, GLint location, GLsizei bufSize, GLint *params); +#endif + +/* GL_EXT_sRGB */ +#ifndef GL_EXT_sRGB +#define GL_EXT_sRGB 1 +#endif + +/* GL_EXT_texture_compression_dxt1 */ +#ifndef GL_EXT_texture_compression_dxt1 +#define GL_EXT_texture_compression_dxt1 1 +#endif + +/* GL_EXT_texture_filter_anisotropic */ +#ifndef GL_EXT_texture_filter_anisotropic +#define GL_EXT_texture_filter_anisotropic 1 +#endif + +/* GL_EXT_texture_format_BGRA8888 */ +#ifndef GL_EXT_texture_format_BGRA8888 +#define GL_EXT_texture_format_BGRA8888 1 +#endif + +/* GL_EXT_texture_lod_bias */ +#ifndef GL_EXT_texture_lod_bias +#define GL_EXT_texture_lod_bias 1 +#endif + +/* GL_EXT_texture_storage */ +#ifndef GL_EXT_texture_storage +#define GL_EXT_texture_storage 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY glTexStorage1DEXT (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width); +GL_API void GL_APIENTRY glTexStorage2DEXT (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height); +GL_API void GL_APIENTRY glTexStorage3DEXT (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth); +GL_API void GL_APIENTRY glTextureStorage1DEXT (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width); +GL_API void GL_APIENTRY glTextureStorage2DEXT (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height); +GL_API void GL_APIENTRY glTextureStorage3DEXT (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth); +#endif +typedef void (GL_APIENTRYP PFNGLTEXSTORAGE1DEXTPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width); +typedef void (GL_APIENTRYP PFNGLTEXSTORAGE2DEXTPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (GL_APIENTRYP PFNGLTEXSTORAGE3DEXTPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth); +typedef void (GL_APIENTRYP PFNGLTEXTURESTORAGE1DEXTPROC) (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width); +typedef void (GL_APIENTRYP PFNGLTEXTURESTORAGE2DEXTPROC) (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (GL_APIENTRYP PFNGLTEXTURESTORAGE3DEXTPROC) (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth); +#endif + +/*------------------------------------------------------------------------* + * IMG extension functions + *------------------------------------------------------------------------*/ + +/* GL_IMG_read_format */ +#ifndef GL_IMG_read_format +#define GL_IMG_read_format 1 +#endif + +/* GL_IMG_texture_compression_pvrtc */ +#ifndef GL_IMG_texture_compression_pvrtc +#define GL_IMG_texture_compression_pvrtc 1 +#endif + +/* GL_IMG_texture_env_enhanced_fixed_function */ +#ifndef GL_IMG_texture_env_enhanced_fixed_function +#define GL_IMG_texture_env_enhanced_fixed_function 1 +#endif + +/* GL_IMG_user_clip_plane */ +#ifndef GL_IMG_user_clip_plane +#define GL_IMG_user_clip_plane 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY glClipPlanefIMG (GLenum, const GLfloat *); +GL_API void GL_APIENTRY glClipPlanexIMG (GLenum, const GLfixed *); +#endif +typedef void (GL_APIENTRYP PFNGLCLIPPLANEFIMGPROC) (GLenum p, const GLfloat *eqn); +typedef void (GL_APIENTRYP PFNGLCLIPPLANEXIMGPROC) (GLenum p, const GLfixed *eqn); +#endif + +/* GL_IMG_multisampled_render_to_texture */ +#ifndef GL_IMG_multisampled_render_to_texture +#define GL_IMG_multisampled_render_to_texture 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY glRenderbufferStorageMultisampleIMG (GLenum, GLsizei, GLenum, GLsizei, GLsizei); +GL_API void GL_APIENTRY glFramebufferTexture2DMultisampleIMG (GLenum, GLenum, GLenum, GLuint, GLint, GLsizei); +#endif +typedef void (GL_APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEIMGPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (GL_APIENTRYP PFNGLFRAMEBUFFERTEXTURE2DMULTISAMPLEIMGPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLsizei samples); +#endif + +/*------------------------------------------------------------------------* + * NV extension functions + *------------------------------------------------------------------------*/ + +/* NV_fence */ +#ifndef GL_NV_fence +#define GL_NV_fence 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY glDeleteFencesNV (GLsizei, const GLuint *); +GL_API void GL_APIENTRY glGenFencesNV (GLsizei, GLuint *); +GL_API GLboolean GL_APIENTRY glIsFenceNV (GLuint); +GL_API GLboolean GL_APIENTRY glTestFenceNV (GLuint); +GL_API void GL_APIENTRY glGetFenceivNV (GLuint, GLenum, GLint *); +GL_API void GL_APIENTRY glFinishFenceNV (GLuint); +GL_API void GL_APIENTRY glSetFenceNV (GLuint, GLenum); +#endif +typedef void (GL_APIENTRYP PFNGLDELETEFENCESNVPROC) (GLsizei n, const GLuint *fences); +typedef void (GL_APIENTRYP PFNGLGENFENCESNVPROC) (GLsizei n, GLuint *fences); +typedef GLboolean (GL_APIENTRYP PFNGLISFENCENVPROC) (GLuint fence); +typedef GLboolean (GL_APIENTRYP PFNGLTESTFENCENVPROC) (GLuint fence); +typedef void (GL_APIENTRYP PFNGLGETFENCEIVNVPROC) (GLuint fence, GLenum pname, GLint *params); +typedef void (GL_APIENTRYP PFNGLFINISHFENCENVPROC) (GLuint fence); +typedef void (GL_APIENTRYP PFNGLSETFENCENVPROC) (GLuint fence, GLenum condition); +#endif + +/*------------------------------------------------------------------------* + * QCOM extension functions + *------------------------------------------------------------------------*/ + +/* GL_QCOM_driver_control */ +#ifndef GL_QCOM_driver_control +#define GL_QCOM_driver_control 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY glGetDriverControlsQCOM (GLint *num, GLsizei size, GLuint *driverControls); +GL_API void GL_APIENTRY glGetDriverControlStringQCOM (GLuint driverControl, GLsizei bufSize, GLsizei *length, GLchar *driverControlString); +GL_API void GL_APIENTRY glEnableDriverControlQCOM (GLuint driverControl); +GL_API void GL_APIENTRY glDisableDriverControlQCOM (GLuint driverControl); +#endif +typedef void (GL_APIENTRYP PFNGLGETDRIVERCONTROLSQCOMPROC) (GLint *num, GLsizei size, GLuint *driverControls); +typedef void (GL_APIENTRYP PFNGLGETDRIVERCONTROLSTRINGQCOMPROC) (GLuint driverControl, GLsizei bufSize, GLsizei *length, GLchar *driverControlString); +typedef void (GL_APIENTRYP PFNGLENABLEDRIVERCONTROLQCOMPROC) (GLuint driverControl); +typedef void (GL_APIENTRYP PFNGLDISABLEDRIVERCONTROLQCOMPROC) (GLuint driverControl); +#endif + +/* GL_QCOM_extended_get */ +#ifndef GL_QCOM_extended_get +#define GL_QCOM_extended_get 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY glExtGetTexturesQCOM (GLuint *textures, GLint maxTextures, GLint *numTextures); +GL_API void GL_APIENTRY glExtGetBuffersQCOM (GLuint *buffers, GLint maxBuffers, GLint *numBuffers); +GL_API void GL_APIENTRY glExtGetRenderbuffersQCOM (GLuint *renderbuffers, GLint maxRenderbuffers, GLint *numRenderbuffers); +GL_API void GL_APIENTRY glExtGetFramebuffersQCOM (GLuint *framebuffers, GLint maxFramebuffers, GLint *numFramebuffers); +GL_API void GL_APIENTRY glExtGetTexLevelParameterivQCOM (GLuint texture, GLenum face, GLint level, GLenum pname, GLint *params); +GL_API void GL_APIENTRY glExtTexObjectStateOverrideiQCOM (GLenum target, GLenum pname, GLint param); +GL_API void GL_APIENTRY glExtGetTexSubImageQCOM (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, GLvoid *texels); +GL_API void GL_APIENTRY glExtGetBufferPointervQCOM (GLenum target, GLvoid **params); +#endif +typedef void (GL_APIENTRYP PFNGLEXTGETTEXTURESQCOMPROC) (GLuint *textures, GLint maxTextures, GLint *numTextures); +typedef void (GL_APIENTRYP PFNGLEXTGETBUFFERSQCOMPROC) (GLuint *buffers, GLint maxBuffers, GLint *numBuffers); +typedef void (GL_APIENTRYP PFNGLEXTGETRENDERBUFFERSQCOMPROC) (GLuint *renderbuffers, GLint maxRenderbuffers, GLint *numRenderbuffers); +typedef void (GL_APIENTRYP PFNGLEXTGETFRAMEBUFFERSQCOMPROC) (GLuint *framebuffers, GLint maxFramebuffers, GLint *numFramebuffers); +typedef void (GL_APIENTRYP PFNGLEXTGETTEXLEVELPARAMETERIVQCOMPROC) (GLuint texture, GLenum face, GLint level, GLenum pname, GLint *params); +typedef void (GL_APIENTRYP PFNGLEXTTEXOBJECTSTATEOVERRIDEIQCOMPROC) (GLenum target, GLenum pname, GLint param); +typedef void (GL_APIENTRYP PFNGLEXTGETTEXSUBIMAGEQCOMPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, GLvoid *texels); +typedef void (GL_APIENTRYP PFNGLEXTGETBUFFERPOINTERVQCOMPROC) (GLenum target, GLvoid **params); +#endif + +/* GL_QCOM_extended_get2 */ +#ifndef GL_QCOM_extended_get2 +#define GL_QCOM_extended_get2 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY glExtGetShadersQCOM (GLuint *shaders, GLint maxShaders, GLint *numShaders); +GL_API void GL_APIENTRY glExtGetProgramsQCOM (GLuint *programs, GLint maxPrograms, GLint *numPrograms); +GL_API GLboolean GL_APIENTRY glExtIsProgramBinaryQCOM (GLuint program); +GL_API void GL_APIENTRY glExtGetProgramBinarySourceQCOM (GLuint program, GLenum shadertype, GLchar *source, GLint *length); +#endif +typedef void (GL_APIENTRYP PFNGLEXTGETSHADERSQCOMPROC) (GLuint *shaders, GLint maxShaders, GLint *numShaders); +typedef void (GL_APIENTRYP PFNGLEXTGETPROGRAMSQCOMPROC) (GLuint *programs, GLint maxPrograms, GLint *numPrograms); +typedef GLboolean (GL_APIENTRYP PFNGLEXTISPROGRAMBINARYQCOMPROC) (GLuint program); +typedef void (GL_APIENTRYP PFNGLEXTGETPROGRAMBINARYSOURCEQCOMPROC) (GLuint program, GLenum shadertype, GLchar *source, GLint *length); +#endif + +/* GL_QCOM_perfmon_global_mode */ +#ifndef GL_QCOM_perfmon_global_mode +#define GL_QCOM_perfmon_global_mode 1 +#endif + +/* GL_QCOM_writeonly_rendering */ +#ifndef GL_QCOM_writeonly_rendering +#define GL_QCOM_writeonly_rendering 1 +#endif + +/* GL_QCOM_tiled_rendering */ +#ifndef GL_QCOM_tiled_rendering +#define GL_QCOM_tiled_rendering 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_API void GL_APIENTRY glStartTilingQCOM (GLuint x, GLuint y, GLuint width, GLuint height, GLbitfield preserveMask); +GL_API void GL_APIENTRY glEndTilingQCOM (GLbitfield preserveMask); +#endif +typedef void (GL_APIENTRYP PFNGLSTARTTILINGQCOMPROC) (GLuint x, GLuint y, GLuint width, GLuint height, GLbitfield preserveMask); +typedef void (GL_APIENTRYP PFNGLENDTILINGQCOMPROC) (GLbitfield preserveMask); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* __glext_h_ */ + diff --git a/opengles-3.1/include/GLES/glplatform.h b/opengles-3.1/include/GLES/glplatform.h new file mode 100644 index 0000000000..2db6ee2ca6 --- /dev/null +++ b/opengles-3.1/include/GLES/glplatform.h @@ -0,0 +1,30 @@ +#ifndef __glplatform_h_ +#define __glplatform_h_ + +/* $Revision: 10601 $ on $Date:: 2010-03-04 22:15:27 -0800 #$ */ + +/* + * This document is licensed under the SGI Free Software B License Version + * 2.0. For details, see http://oss.sgi.com/projects/FreeB/ . + */ + +/* Platform-specific types and definitions for OpenGL ES 1.X gl.h + * + * Adopters may modify khrplatform.h and this file to suit their platform. + * You are encouraged to submit all modifications to the Khronos group so that + * they can be included in future versions of this file. Please submit changes + * by sending them to the public Khronos Bugzilla (http://khronos.org/bugzilla) + * by filing a bug against product "OpenGL-ES" component "Registry". + */ + +#include + +#ifndef GL_API +#define GL_API KHRONOS_APICALL +#endif + +#ifndef GL_APIENTRY +#define GL_APIENTRY KHRONOS_APIENTRY +#endif + +#endif /* __glplatform_h_ */ diff --git a/opengles-3.1/include/GLES2/gl2.h b/opengles-3.1/include/GLES2/gl2.h new file mode 100644 index 0000000000..b7fe76ee1c --- /dev/null +++ b/opengles-3.1/include/GLES2/gl2.h @@ -0,0 +1,620 @@ +#ifndef __gl2_h_ +#define __gl2_h_ + +/* $Revision: 16803 $ on $Date:: 2012-02-02 09:49:18 -0800 #$ */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This document is licensed under the SGI Free Software B License Version + * 2.0. For details, see http://oss.sgi.com/projects/FreeB/ . + */ + +/*------------------------------------------------------------------------- + * Data type definitions + *-----------------------------------------------------------------------*/ + +typedef void GLvoid; +typedef char GLchar; +typedef unsigned int GLenum; +typedef unsigned char GLboolean; +typedef unsigned int GLbitfield; +typedef khronos_int8_t GLbyte; +typedef short GLshort; +typedef int GLint; +typedef int GLsizei; +typedef khronos_uint8_t GLubyte; +typedef unsigned short GLushort; +typedef unsigned int GLuint; +typedef khronos_float_t GLfloat; +typedef khronos_float_t GLclampf; +typedef khronos_int32_t GLfixed; + +/* GL types for handling large vertex buffer objects */ +typedef khronos_intptr_t GLintptr; +typedef khronos_ssize_t GLsizeiptr; + +/* OpenGL ES core versions */ +#define GL_ES_VERSION_2_0 1 + +/* ClearBufferMask */ +#define GL_DEPTH_BUFFER_BIT 0x00000100 +#define GL_STENCIL_BUFFER_BIT 0x00000400 +#define GL_COLOR_BUFFER_BIT 0x00004000 + +/* Boolean */ +#define GL_FALSE 0 +#define GL_TRUE 1 + +/* BeginMode */ +#define GL_POINTS 0x0000 +#define GL_LINES 0x0001 +#define GL_LINE_LOOP 0x0002 +#define GL_LINE_STRIP 0x0003 +#define GL_TRIANGLES 0x0004 +#define GL_TRIANGLE_STRIP 0x0005 +#define GL_TRIANGLE_FAN 0x0006 + +/* AlphaFunction (not supported in ES20) */ +/* GL_NEVER */ +/* GL_LESS */ +/* GL_EQUAL */ +/* GL_LEQUAL */ +/* GL_GREATER */ +/* GL_NOTEQUAL */ +/* GL_GEQUAL */ +/* GL_ALWAYS */ + +/* BlendingFactorDest */ +#define GL_ZERO 0 +#define GL_ONE 1 +#define GL_SRC_COLOR 0x0300 +#define GL_ONE_MINUS_SRC_COLOR 0x0301 +#define GL_SRC_ALPHA 0x0302 +#define GL_ONE_MINUS_SRC_ALPHA 0x0303 +#define GL_DST_ALPHA 0x0304 +#define GL_ONE_MINUS_DST_ALPHA 0x0305 + +/* BlendingFactorSrc */ +/* GL_ZERO */ +/* GL_ONE */ +#define GL_DST_COLOR 0x0306 +#define GL_ONE_MINUS_DST_COLOR 0x0307 +#define GL_SRC_ALPHA_SATURATE 0x0308 +/* GL_SRC_ALPHA */ +/* GL_ONE_MINUS_SRC_ALPHA */ +/* GL_DST_ALPHA */ +/* GL_ONE_MINUS_DST_ALPHA */ + +/* BlendEquationSeparate */ +#define GL_FUNC_ADD 0x8006 +#define GL_BLEND_EQUATION 0x8009 +#define GL_BLEND_EQUATION_RGB 0x8009 /* same as BLEND_EQUATION */ +#define GL_BLEND_EQUATION_ALPHA 0x883D + +/* BlendSubtract */ +#define GL_FUNC_SUBTRACT 0x800A +#define GL_FUNC_REVERSE_SUBTRACT 0x800B + +/* Separate Blend Functions */ +#define GL_BLEND_DST_RGB 0x80C8 +#define GL_BLEND_SRC_RGB 0x80C9 +#define GL_BLEND_DST_ALPHA 0x80CA +#define GL_BLEND_SRC_ALPHA 0x80CB +#define GL_CONSTANT_COLOR 0x8001 +#define GL_ONE_MINUS_CONSTANT_COLOR 0x8002 +#define GL_CONSTANT_ALPHA 0x8003 +#define GL_ONE_MINUS_CONSTANT_ALPHA 0x8004 +#define GL_BLEND_COLOR 0x8005 + +/* Buffer Objects */ +#define GL_ARRAY_BUFFER 0x8892 +#define GL_ELEMENT_ARRAY_BUFFER 0x8893 +#define GL_ARRAY_BUFFER_BINDING 0x8894 +#define GL_ELEMENT_ARRAY_BUFFER_BINDING 0x8895 + +#define GL_STREAM_DRAW 0x88E0 +#define GL_STATIC_DRAW 0x88E4 +#define GL_DYNAMIC_DRAW 0x88E8 + +#define GL_BUFFER_SIZE 0x8764 +#define GL_BUFFER_USAGE 0x8765 + +#define GL_CURRENT_VERTEX_ATTRIB 0x8626 + +/* CullFaceMode */ +#define GL_FRONT 0x0404 +#define GL_BACK 0x0405 +#define GL_FRONT_AND_BACK 0x0408 + +/* DepthFunction */ +/* GL_NEVER */ +/* GL_LESS */ +/* GL_EQUAL */ +/* GL_LEQUAL */ +/* GL_GREATER */ +/* GL_NOTEQUAL */ +/* GL_GEQUAL */ +/* GL_ALWAYS */ + +/* EnableCap */ +#define GL_TEXTURE_2D 0x0DE1 +#define GL_CULL_FACE 0x0B44 +#define GL_BLEND 0x0BE2 +#define GL_DITHER 0x0BD0 +#define GL_STENCIL_TEST 0x0B90 +#define GL_DEPTH_TEST 0x0B71 +#define GL_SCISSOR_TEST 0x0C11 +#define GL_POLYGON_OFFSET_FILL 0x8037 +#define GL_SAMPLE_ALPHA_TO_COVERAGE 0x809E +#define GL_SAMPLE_COVERAGE 0x80A0 + +/* ErrorCode */ +#define GL_NO_ERROR 0 +#define GL_INVALID_ENUM 0x0500 +#define GL_INVALID_VALUE 0x0501 +#define GL_INVALID_OPERATION 0x0502 +#define GL_OUT_OF_MEMORY 0x0505 + +/* FrontFaceDirection */ +#define GL_CW 0x0900 +#define GL_CCW 0x0901 + +/* GetPName */ +#define GL_LINE_WIDTH 0x0B21 +#define GL_ALIASED_POINT_SIZE_RANGE 0x846D +#define GL_ALIASED_LINE_WIDTH_RANGE 0x846E +#define GL_CULL_FACE_MODE 0x0B45 +#define GL_FRONT_FACE 0x0B46 +#define GL_DEPTH_RANGE 0x0B70 +#define GL_DEPTH_WRITEMASK 0x0B72 +#define GL_DEPTH_CLEAR_VALUE 0x0B73 +#define GL_DEPTH_FUNC 0x0B74 +#define GL_STENCIL_CLEAR_VALUE 0x0B91 +#define GL_STENCIL_FUNC 0x0B92 +#define GL_STENCIL_FAIL 0x0B94 +#define GL_STENCIL_PASS_DEPTH_FAIL 0x0B95 +#define GL_STENCIL_PASS_DEPTH_PASS 0x0B96 +#define GL_STENCIL_REF 0x0B97 +#define GL_STENCIL_VALUE_MASK 0x0B93 +#define GL_STENCIL_WRITEMASK 0x0B98 +#define GL_STENCIL_BACK_FUNC 0x8800 +#define GL_STENCIL_BACK_FAIL 0x8801 +#define GL_STENCIL_BACK_PASS_DEPTH_FAIL 0x8802 +#define GL_STENCIL_BACK_PASS_DEPTH_PASS 0x8803 +#define GL_STENCIL_BACK_REF 0x8CA3 +#define GL_STENCIL_BACK_VALUE_MASK 0x8CA4 +#define GL_STENCIL_BACK_WRITEMASK 0x8CA5 +#define GL_VIEWPORT 0x0BA2 +#define GL_SCISSOR_BOX 0x0C10 +/* GL_SCISSOR_TEST */ +#define GL_COLOR_CLEAR_VALUE 0x0C22 +#define GL_COLOR_WRITEMASK 0x0C23 +#define GL_UNPACK_ALIGNMENT 0x0CF5 +#define GL_PACK_ALIGNMENT 0x0D05 +#define GL_MAX_TEXTURE_SIZE 0x0D33 +#define GL_MAX_VIEWPORT_DIMS 0x0D3A +#define GL_SUBPIXEL_BITS 0x0D50 +#define GL_RED_BITS 0x0D52 +#define GL_GREEN_BITS 0x0D53 +#define GL_BLUE_BITS 0x0D54 +#define GL_ALPHA_BITS 0x0D55 +#define GL_DEPTH_BITS 0x0D56 +#define GL_STENCIL_BITS 0x0D57 +#define GL_POLYGON_OFFSET_UNITS 0x2A00 +/* GL_POLYGON_OFFSET_FILL */ +#define GL_POLYGON_OFFSET_FACTOR 0x8038 +#define GL_TEXTURE_BINDING_2D 0x8069 +#define GL_SAMPLE_BUFFERS 0x80A8 +#define GL_SAMPLES 0x80A9 +#define GL_SAMPLE_COVERAGE_VALUE 0x80AA +#define GL_SAMPLE_COVERAGE_INVERT 0x80AB + +/* GetTextureParameter */ +/* GL_TEXTURE_MAG_FILTER */ +/* GL_TEXTURE_MIN_FILTER */ +/* GL_TEXTURE_WRAP_S */ +/* GL_TEXTURE_WRAP_T */ + +#define GL_NUM_COMPRESSED_TEXTURE_FORMATS 0x86A2 +#define GL_COMPRESSED_TEXTURE_FORMATS 0x86A3 + +/* HintMode */ +#define GL_DONT_CARE 0x1100 +#define GL_FASTEST 0x1101 +#define GL_NICEST 0x1102 + +/* HintTarget */ +#define GL_GENERATE_MIPMAP_HINT 0x8192 + +/* DataType */ +#define GL_BYTE 0x1400 +#define GL_UNSIGNED_BYTE 0x1401 +#define GL_SHORT 0x1402 +#define GL_UNSIGNED_SHORT 0x1403 +#define GL_INT 0x1404 +#define GL_UNSIGNED_INT 0x1405 +#define GL_FLOAT 0x1406 +#define GL_FIXED 0x140C + +/* PixelFormat */ +#define GL_DEPTH_COMPONENT 0x1902 +#define GL_ALPHA 0x1906 +#define GL_RGB 0x1907 +#define GL_RGBA 0x1908 +#define GL_LUMINANCE 0x1909 +#define GL_LUMINANCE_ALPHA 0x190A + +/* PixelType */ +/* GL_UNSIGNED_BYTE */ +#define GL_UNSIGNED_SHORT_4_4_4_4 0x8033 +#define GL_UNSIGNED_SHORT_5_5_5_1 0x8034 +#define GL_UNSIGNED_SHORT_5_6_5 0x8363 + +/* Shaders */ +#define GL_FRAGMENT_SHADER 0x8B30 +#define GL_VERTEX_SHADER 0x8B31 +#define GL_MAX_VERTEX_ATTRIBS 0x8869 +#define GL_MAX_VERTEX_UNIFORM_VECTORS 0x8DFB +#define GL_MAX_VARYING_VECTORS 0x8DFC +#define GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS 0x8B4D +#define GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS 0x8B4C +#define GL_MAX_TEXTURE_IMAGE_UNITS 0x8872 +#define GL_MAX_FRAGMENT_UNIFORM_VECTORS 0x8DFD +#define GL_SHADER_TYPE 0x8B4F +#define GL_DELETE_STATUS 0x8B80 +#define GL_LINK_STATUS 0x8B82 +#define GL_VALIDATE_STATUS 0x8B83 +#define GL_ATTACHED_SHADERS 0x8B85 +#define GL_ACTIVE_UNIFORMS 0x8B86 +#define GL_ACTIVE_UNIFORM_MAX_LENGTH 0x8B87 +#define GL_ACTIVE_ATTRIBUTES 0x8B89 +#define GL_ACTIVE_ATTRIBUTE_MAX_LENGTH 0x8B8A +#define GL_SHADING_LANGUAGE_VERSION 0x8B8C +#define GL_CURRENT_PROGRAM 0x8B8D + +/* StencilFunction */ +#define GL_NEVER 0x0200 +#define GL_LESS 0x0201 +#define GL_EQUAL 0x0202 +#define GL_LEQUAL 0x0203 +#define GL_GREATER 0x0204 +#define GL_NOTEQUAL 0x0205 +#define GL_GEQUAL 0x0206 +#define GL_ALWAYS 0x0207 + +/* StencilOp */ +/* GL_ZERO */ +#define GL_KEEP 0x1E00 +#define GL_REPLACE 0x1E01 +#define GL_INCR 0x1E02 +#define GL_DECR 0x1E03 +#define GL_INVERT 0x150A +#define GL_INCR_WRAP 0x8507 +#define GL_DECR_WRAP 0x8508 + +/* StringName */ +#define GL_VENDOR 0x1F00 +#define GL_RENDERER 0x1F01 +#define GL_VERSION 0x1F02 +#define GL_EXTENSIONS 0x1F03 + +/* TextureMagFilter */ +#define GL_NEAREST 0x2600 +#define GL_LINEAR 0x2601 + +/* TextureMinFilter */ +/* GL_NEAREST */ +/* GL_LINEAR */ +#define GL_NEAREST_MIPMAP_NEAREST 0x2700 +#define GL_LINEAR_MIPMAP_NEAREST 0x2701 +#define GL_NEAREST_MIPMAP_LINEAR 0x2702 +#define GL_LINEAR_MIPMAP_LINEAR 0x2703 + +/* TextureParameterName */ +#define GL_TEXTURE_MAG_FILTER 0x2800 +#define GL_TEXTURE_MIN_FILTER 0x2801 +#define GL_TEXTURE_WRAP_S 0x2802 +#define GL_TEXTURE_WRAP_T 0x2803 + +/* TextureTarget */ +/* GL_TEXTURE_2D */ +#define GL_TEXTURE 0x1702 + +#define GL_TEXTURE_CUBE_MAP 0x8513 +#define GL_TEXTURE_BINDING_CUBE_MAP 0x8514 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x8515 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x8516 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x8517 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x8518 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x8519 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x851A +#define GL_MAX_CUBE_MAP_TEXTURE_SIZE 0x851C + +/* TextureUnit */ +#define GL_TEXTURE0 0x84C0 +#define GL_TEXTURE1 0x84C1 +#define GL_TEXTURE2 0x84C2 +#define GL_TEXTURE3 0x84C3 +#define GL_TEXTURE4 0x84C4 +#define GL_TEXTURE5 0x84C5 +#define GL_TEXTURE6 0x84C6 +#define GL_TEXTURE7 0x84C7 +#define GL_TEXTURE8 0x84C8 +#define GL_TEXTURE9 0x84C9 +#define GL_TEXTURE10 0x84CA +#define GL_TEXTURE11 0x84CB +#define GL_TEXTURE12 0x84CC +#define GL_TEXTURE13 0x84CD +#define GL_TEXTURE14 0x84CE +#define GL_TEXTURE15 0x84CF +#define GL_TEXTURE16 0x84D0 +#define GL_TEXTURE17 0x84D1 +#define GL_TEXTURE18 0x84D2 +#define GL_TEXTURE19 0x84D3 +#define GL_TEXTURE20 0x84D4 +#define GL_TEXTURE21 0x84D5 +#define GL_TEXTURE22 0x84D6 +#define GL_TEXTURE23 0x84D7 +#define GL_TEXTURE24 0x84D8 +#define GL_TEXTURE25 0x84D9 +#define GL_TEXTURE26 0x84DA +#define GL_TEXTURE27 0x84DB +#define GL_TEXTURE28 0x84DC +#define GL_TEXTURE29 0x84DD +#define GL_TEXTURE30 0x84DE +#define GL_TEXTURE31 0x84DF +#define GL_ACTIVE_TEXTURE 0x84E0 + +/* TextureWrapMode */ +#define GL_REPEAT 0x2901 +#define GL_CLAMP_TO_EDGE 0x812F +#define GL_MIRRORED_REPEAT 0x8370 + +/* Uniform Types */ +#define GL_FLOAT_VEC2 0x8B50 +#define GL_FLOAT_VEC3 0x8B51 +#define GL_FLOAT_VEC4 0x8B52 +#define GL_INT_VEC2 0x8B53 +#define GL_INT_VEC3 0x8B54 +#define GL_INT_VEC4 0x8B55 +#define GL_BOOL 0x8B56 +#define GL_BOOL_VEC2 0x8B57 +#define GL_BOOL_VEC3 0x8B58 +#define GL_BOOL_VEC4 0x8B59 +#define GL_FLOAT_MAT2 0x8B5A +#define GL_FLOAT_MAT3 0x8B5B +#define GL_FLOAT_MAT4 0x8B5C +#define GL_SAMPLER_2D 0x8B5E +#define GL_SAMPLER_CUBE 0x8B60 + +/* Vertex Arrays */ +#define GL_VERTEX_ATTRIB_ARRAY_ENABLED 0x8622 +#define GL_VERTEX_ATTRIB_ARRAY_SIZE 0x8623 +#define GL_VERTEX_ATTRIB_ARRAY_STRIDE 0x8624 +#define GL_VERTEX_ATTRIB_ARRAY_TYPE 0x8625 +#define GL_VERTEX_ATTRIB_ARRAY_NORMALIZED 0x886A +#define GL_VERTEX_ATTRIB_ARRAY_POINTER 0x8645 +#define GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING 0x889F + +/* Read Format */ +#define GL_IMPLEMENTATION_COLOR_READ_TYPE 0x8B9A +#define GL_IMPLEMENTATION_COLOR_READ_FORMAT 0x8B9B + +/* Shader Source */ +#define GL_COMPILE_STATUS 0x8B81 +#define GL_INFO_LOG_LENGTH 0x8B84 +#define GL_SHADER_SOURCE_LENGTH 0x8B88 +#define GL_SHADER_COMPILER 0x8DFA + +/* Shader Binary */ +#define GL_SHADER_BINARY_FORMATS 0x8DF8 +#define GL_NUM_SHADER_BINARY_FORMATS 0x8DF9 + +/* Shader Precision-Specified Types */ +#define GL_LOW_FLOAT 0x8DF0 +#define GL_MEDIUM_FLOAT 0x8DF1 +#define GL_HIGH_FLOAT 0x8DF2 +#define GL_LOW_INT 0x8DF3 +#define GL_MEDIUM_INT 0x8DF4 +#define GL_HIGH_INT 0x8DF5 + +/* Framebuffer Object. */ +#define GL_FRAMEBUFFER 0x8D40 +#define GL_RENDERBUFFER 0x8D41 + +#define GL_RGBA4 0x8056 +#define GL_RGB5_A1 0x8057 +#define GL_RGB565 0x8D62 +#define GL_DEPTH_COMPONENT16 0x81A5 +#define GL_STENCIL_INDEX8 0x8D48 + +#define GL_RENDERBUFFER_WIDTH 0x8D42 +#define GL_RENDERBUFFER_HEIGHT 0x8D43 +#define GL_RENDERBUFFER_INTERNAL_FORMAT 0x8D44 +#define GL_RENDERBUFFER_RED_SIZE 0x8D50 +#define GL_RENDERBUFFER_GREEN_SIZE 0x8D51 +#define GL_RENDERBUFFER_BLUE_SIZE 0x8D52 +#define GL_RENDERBUFFER_ALPHA_SIZE 0x8D53 +#define GL_RENDERBUFFER_DEPTH_SIZE 0x8D54 +#define GL_RENDERBUFFER_STENCIL_SIZE 0x8D55 + +#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE 0x8CD0 +#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME 0x8CD1 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL 0x8CD2 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE 0x8CD3 + +#define GL_COLOR_ATTACHMENT0 0x8CE0 +#define GL_DEPTH_ATTACHMENT 0x8D00 +#define GL_STENCIL_ATTACHMENT 0x8D20 + +#define GL_NONE 0 + +#define GL_FRAMEBUFFER_COMPLETE 0x8CD5 +#define GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT 0x8CD6 +#define GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT 0x8CD7 +#define GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS 0x8CD9 +#define GL_FRAMEBUFFER_UNSUPPORTED 0x8CDD + +#define GL_FRAMEBUFFER_BINDING 0x8CA6 +#define GL_RENDERBUFFER_BINDING 0x8CA7 +#define GL_MAX_RENDERBUFFER_SIZE 0x84E8 + +#define GL_INVALID_FRAMEBUFFER_OPERATION 0x0506 + +/*------------------------------------------------------------------------- + * GL core functions. + *-----------------------------------------------------------------------*/ + +GL_APICALL void GL_APIENTRY glActiveTexture (GLenum texture); +GL_APICALL void GL_APIENTRY glAttachShader (GLuint program, GLuint shader); +GL_APICALL void GL_APIENTRY glBindAttribLocation (GLuint program, GLuint index, const GLchar* name); +GL_APICALL void GL_APIENTRY glBindBuffer (GLenum target, GLuint buffer); +GL_APICALL void GL_APIENTRY glBindFramebuffer (GLenum target, GLuint framebuffer); +GL_APICALL void GL_APIENTRY glBindRenderbuffer (GLenum target, GLuint renderbuffer); +GL_APICALL void GL_APIENTRY glBindTexture (GLenum target, GLuint texture); +GL_APICALL void GL_APIENTRY glBlendColor (GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha); +GL_APICALL void GL_APIENTRY glBlendEquation ( GLenum mode ); +GL_APICALL void GL_APIENTRY glBlendEquationSeparate (GLenum modeRGB, GLenum modeAlpha); +GL_APICALL void GL_APIENTRY glBlendFunc (GLenum sfactor, GLenum dfactor); +GL_APICALL void GL_APIENTRY glBlendFuncSeparate (GLenum srcRGB, GLenum dstRGB, GLenum srcAlpha, GLenum dstAlpha); +GL_APICALL void GL_APIENTRY glBufferData (GLenum target, GLsizeiptr size, const GLvoid* data, GLenum usage); +GL_APICALL void GL_APIENTRY glBufferSubData (GLenum target, GLintptr offset, GLsizeiptr size, const GLvoid* data); +GL_APICALL GLenum GL_APIENTRY glCheckFramebufferStatus (GLenum target); +GL_APICALL void GL_APIENTRY glClear (GLbitfield mask); +GL_APICALL void GL_APIENTRY glClearColor (GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha); +GL_APICALL void GL_APIENTRY glClearDepthf (GLclampf depth); +GL_APICALL void GL_APIENTRY glClearStencil (GLint s); +GL_APICALL void GL_APIENTRY glColorMask (GLboolean red, GLboolean green, GLboolean blue, GLboolean alpha); +GL_APICALL void GL_APIENTRY glCompileShader (GLuint shader); +GL_APICALL void GL_APIENTRY glCompressedTexImage2D (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const GLvoid* data); +GL_APICALL void GL_APIENTRY glCompressedTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const GLvoid* data); +GL_APICALL void GL_APIENTRY glCopyTexImage2D (GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border); +GL_APICALL void GL_APIENTRY glCopyTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height); +GL_APICALL GLuint GL_APIENTRY glCreateProgram (void); +GL_APICALL GLuint GL_APIENTRY glCreateShader (GLenum type); +GL_APICALL void GL_APIENTRY glCullFace (GLenum mode); +GL_APICALL void GL_APIENTRY glDeleteBuffers (GLsizei n, const GLuint* buffers); +GL_APICALL void GL_APIENTRY glDeleteFramebuffers (GLsizei n, const GLuint* framebuffers); +GL_APICALL void GL_APIENTRY glDeleteProgram (GLuint program); +GL_APICALL void GL_APIENTRY glDeleteRenderbuffers (GLsizei n, const GLuint* renderbuffers); +GL_APICALL void GL_APIENTRY glDeleteShader (GLuint shader); +GL_APICALL void GL_APIENTRY glDeleteTextures (GLsizei n, const GLuint* textures); +GL_APICALL void GL_APIENTRY glDepthFunc (GLenum func); +GL_APICALL void GL_APIENTRY glDepthMask (GLboolean flag); +GL_APICALL void GL_APIENTRY glDepthRangef (GLclampf zNear, GLclampf zFar); +GL_APICALL void GL_APIENTRY glDetachShader (GLuint program, GLuint shader); +GL_APICALL void GL_APIENTRY glDisable (GLenum cap); +GL_APICALL void GL_APIENTRY glDisableVertexAttribArray (GLuint index); +GL_APICALL void GL_APIENTRY glDrawArrays (GLenum mode, GLint first, GLsizei count); +GL_APICALL void GL_APIENTRY glDrawElements (GLenum mode, GLsizei count, GLenum type, const GLvoid* indices); +GL_APICALL void GL_APIENTRY glEnable (GLenum cap); +GL_APICALL void GL_APIENTRY glEnableVertexAttribArray (GLuint index); +GL_APICALL void GL_APIENTRY glFinish (void); +GL_APICALL void GL_APIENTRY glFlush (void); +GL_APICALL void GL_APIENTRY glFramebufferRenderbuffer (GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer); +GL_APICALL void GL_APIENTRY glFramebufferTexture2D (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); +GL_APICALL void GL_APIENTRY glFrontFace (GLenum mode); +GL_APICALL void GL_APIENTRY glGenBuffers (GLsizei n, GLuint* buffers); +GL_APICALL void GL_APIENTRY glGenerateMipmap (GLenum target); +GL_APICALL void GL_APIENTRY glGenFramebuffers (GLsizei n, GLuint* framebuffers); +GL_APICALL void GL_APIENTRY glGenRenderbuffers (GLsizei n, GLuint* renderbuffers); +GL_APICALL void GL_APIENTRY glGenTextures (GLsizei n, GLuint* textures); +GL_APICALL void GL_APIENTRY glGetActiveAttrib (GLuint program, GLuint index, GLsizei bufsize, GLsizei* length, GLint* size, GLenum* type, GLchar* name); +GL_APICALL void GL_APIENTRY glGetActiveUniform (GLuint program, GLuint index, GLsizei bufsize, GLsizei* length, GLint* size, GLenum* type, GLchar* name); +GL_APICALL void GL_APIENTRY glGetAttachedShaders (GLuint program, GLsizei maxcount, GLsizei* count, GLuint* shaders); +GL_APICALL int GL_APIENTRY glGetAttribLocation (GLuint program, const GLchar* name); +GL_APICALL void GL_APIENTRY glGetBooleanv (GLenum pname, GLboolean* params); +GL_APICALL void GL_APIENTRY glGetBufferParameteriv (GLenum target, GLenum pname, GLint* params); +GL_APICALL GLenum GL_APIENTRY glGetError (void); +GL_APICALL void GL_APIENTRY glGetFloatv (GLenum pname, GLfloat* params); +GL_APICALL void GL_APIENTRY glGetFramebufferAttachmentParameteriv (GLenum target, GLenum attachment, GLenum pname, GLint* params); +GL_APICALL void GL_APIENTRY glGetIntegerv (GLenum pname, GLint* params); +GL_APICALL void GL_APIENTRY glGetProgramiv (GLuint program, GLenum pname, GLint* params); +GL_APICALL void GL_APIENTRY glGetProgramInfoLog (GLuint program, GLsizei bufsize, GLsizei* length, GLchar* infolog); +GL_APICALL void GL_APIENTRY glGetRenderbufferParameteriv (GLenum target, GLenum pname, GLint* params); +GL_APICALL void GL_APIENTRY glGetShaderiv (GLuint shader, GLenum pname, GLint* params); +GL_APICALL void GL_APIENTRY glGetShaderInfoLog (GLuint shader, GLsizei bufsize, GLsizei* length, GLchar* infolog); +GL_APICALL void GL_APIENTRY glGetShaderPrecisionFormat (GLenum shadertype, GLenum precisiontype, GLint* range, GLint* precision); +GL_APICALL void GL_APIENTRY glGetShaderSource (GLuint shader, GLsizei bufsize, GLsizei* length, GLchar* source); +GL_APICALL const GLubyte* GL_APIENTRY glGetString (GLenum name); +GL_APICALL void GL_APIENTRY glGetTexParameterfv (GLenum target, GLenum pname, GLfloat* params); +GL_APICALL void GL_APIENTRY glGetTexParameteriv (GLenum target, GLenum pname, GLint* params); +GL_APICALL void GL_APIENTRY glGetUniformfv (GLuint program, GLint location, GLfloat* params); +GL_APICALL void GL_APIENTRY glGetUniformiv (GLuint program, GLint location, GLint* params); +GL_APICALL int GL_APIENTRY glGetUniformLocation (GLuint program, const GLchar* name); +GL_APICALL void GL_APIENTRY glGetVertexAttribfv (GLuint index, GLenum pname, GLfloat* params); +GL_APICALL void GL_APIENTRY glGetVertexAttribiv (GLuint index, GLenum pname, GLint* params); +GL_APICALL void GL_APIENTRY glGetVertexAttribPointerv (GLuint index, GLenum pname, GLvoid** pointer); +GL_APICALL void GL_APIENTRY glHint (GLenum target, GLenum mode); +GL_APICALL GLboolean GL_APIENTRY glIsBuffer (GLuint buffer); +GL_APICALL GLboolean GL_APIENTRY glIsEnabled (GLenum cap); +GL_APICALL GLboolean GL_APIENTRY glIsFramebuffer (GLuint framebuffer); +GL_APICALL GLboolean GL_APIENTRY glIsProgram (GLuint program); +GL_APICALL GLboolean GL_APIENTRY glIsRenderbuffer (GLuint renderbuffer); +GL_APICALL GLboolean GL_APIENTRY glIsShader (GLuint shader); +GL_APICALL GLboolean GL_APIENTRY glIsTexture (GLuint texture); +GL_APICALL void GL_APIENTRY glLineWidth (GLfloat width); +GL_APICALL void GL_APIENTRY glLinkProgram (GLuint program); +GL_APICALL void GL_APIENTRY glPixelStorei (GLenum pname, GLint param); +GL_APICALL void GL_APIENTRY glPolygonOffset (GLfloat factor, GLfloat units); +GL_APICALL void GL_APIENTRY glReadPixels (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLvoid* pixels); +GL_APICALL void GL_APIENTRY glReleaseShaderCompiler (void); +GL_APICALL void GL_APIENTRY glRenderbufferStorage (GLenum target, GLenum internalformat, GLsizei width, GLsizei height); +GL_APICALL void GL_APIENTRY glSampleCoverage (GLclampf value, GLboolean invert); +GL_APICALL void GL_APIENTRY glScissor (GLint x, GLint y, GLsizei width, GLsizei height); +GL_APICALL void GL_APIENTRY glShaderBinary (GLsizei n, const GLuint* shaders, GLenum binaryformat, const GLvoid* binary, GLsizei length); +GL_APICALL void GL_APIENTRY glShaderSource (GLuint shader, GLsizei count, const GLchar* const* string, const GLint* length); +GL_APICALL void GL_APIENTRY glStencilFunc (GLenum func, GLint ref, GLuint mask); +GL_APICALL void GL_APIENTRY glStencilFuncSeparate (GLenum face, GLenum func, GLint ref, GLuint mask); +GL_APICALL void GL_APIENTRY glStencilMask (GLuint mask); +GL_APICALL void GL_APIENTRY glStencilMaskSeparate (GLenum face, GLuint mask); +GL_APICALL void GL_APIENTRY glStencilOp (GLenum fail, GLenum zfail, GLenum zpass); +GL_APICALL void GL_APIENTRY glStencilOpSeparate (GLenum face, GLenum fail, GLenum zfail, GLenum zpass); +GL_APICALL void GL_APIENTRY glTexImage2D (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid* pixels); +GL_APICALL void GL_APIENTRY glTexParameterf (GLenum target, GLenum pname, GLfloat param); +GL_APICALL void GL_APIENTRY glTexParameterfv (GLenum target, GLenum pname, const GLfloat* params); +GL_APICALL void GL_APIENTRY glTexParameteri (GLenum target, GLenum pname, GLint param); +GL_APICALL void GL_APIENTRY glTexParameteriv (GLenum target, GLenum pname, const GLint* params); +GL_APICALL void GL_APIENTRY glTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid* pixels); +GL_APICALL void GL_APIENTRY glUniform1f (GLint location, GLfloat x); +GL_APICALL void GL_APIENTRY glUniform1fv (GLint location, GLsizei count, const GLfloat* v); +GL_APICALL void GL_APIENTRY glUniform1i (GLint location, GLint x); +GL_APICALL void GL_APIENTRY glUniform1iv (GLint location, GLsizei count, const GLint* v); +GL_APICALL void GL_APIENTRY glUniform2f (GLint location, GLfloat x, GLfloat y); +GL_APICALL void GL_APIENTRY glUniform2fv (GLint location, GLsizei count, const GLfloat* v); +GL_APICALL void GL_APIENTRY glUniform2i (GLint location, GLint x, GLint y); +GL_APICALL void GL_APIENTRY glUniform2iv (GLint location, GLsizei count, const GLint* v); +GL_APICALL void GL_APIENTRY glUniform3f (GLint location, GLfloat x, GLfloat y, GLfloat z); +GL_APICALL void GL_APIENTRY glUniform3fv (GLint location, GLsizei count, const GLfloat* v); +GL_APICALL void GL_APIENTRY glUniform3i (GLint location, GLint x, GLint y, GLint z); +GL_APICALL void GL_APIENTRY glUniform3iv (GLint location, GLsizei count, const GLint* v); +GL_APICALL void GL_APIENTRY glUniform4f (GLint location, GLfloat x, GLfloat y, GLfloat z, GLfloat w); +GL_APICALL void GL_APIENTRY glUniform4fv (GLint location, GLsizei count, const GLfloat* v); +GL_APICALL void GL_APIENTRY glUniform4i (GLint location, GLint x, GLint y, GLint z, GLint w); +GL_APICALL void GL_APIENTRY glUniform4iv (GLint location, GLsizei count, const GLint* v); +GL_APICALL void GL_APIENTRY glUniformMatrix2fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value); +GL_APICALL void GL_APIENTRY glUniformMatrix3fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value); +GL_APICALL void GL_APIENTRY glUniformMatrix4fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value); +GL_APICALL void GL_APIENTRY glUseProgram (GLuint program); +GL_APICALL void GL_APIENTRY glValidateProgram (GLuint program); +GL_APICALL void GL_APIENTRY glVertexAttrib1f (GLuint indx, GLfloat x); +GL_APICALL void GL_APIENTRY glVertexAttrib1fv (GLuint indx, const GLfloat* values); +GL_APICALL void GL_APIENTRY glVertexAttrib2f (GLuint indx, GLfloat x, GLfloat y); +GL_APICALL void GL_APIENTRY glVertexAttrib2fv (GLuint indx, const GLfloat* values); +GL_APICALL void GL_APIENTRY glVertexAttrib3f (GLuint indx, GLfloat x, GLfloat y, GLfloat z); +GL_APICALL void GL_APIENTRY glVertexAttrib3fv (GLuint indx, const GLfloat* values); +GL_APICALL void GL_APIENTRY glVertexAttrib4f (GLuint indx, GLfloat x, GLfloat y, GLfloat z, GLfloat w); +GL_APICALL void GL_APIENTRY glVertexAttrib4fv (GLuint indx, const GLfloat* values); +GL_APICALL void GL_APIENTRY glVertexAttribPointer (GLuint indx, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const GLvoid* ptr); +GL_APICALL void GL_APIENTRY glViewport (GLint x, GLint y, GLsizei width, GLsizei height); + +#ifdef __cplusplus +} +#endif + +#endif /* __gl2_h_ */ diff --git a/opengles-3.1/include/GLES2/gl2ext.h b/opengles-3.1/include/GLES2/gl2ext.h new file mode 100644 index 0000000000..f7e7a61369 --- /dev/null +++ b/opengles-3.1/include/GLES2/gl2ext.h @@ -0,0 +1,1809 @@ +#ifndef __gl2ext_h_ +#define __gl2ext_h_ + +/* $Revision: 19436 $ on $Date:: 2012-10-10 10:37:04 -0700 #$ */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This document is licensed under the SGI Free Software B License Version + * 2.0. For details, see http://oss.sgi.com/projects/FreeB/ . + */ + +#ifndef GL_APIENTRYP +# define GL_APIENTRYP GL_APIENTRY* +#endif + +/*------------------------------------------------------------------------* + * OES extension tokens + *------------------------------------------------------------------------*/ + +/* GL_OES_compressed_ETC1_RGB8_texture */ +#ifndef GL_OES_compressed_ETC1_RGB8_texture +#define GL_ETC1_RGB8_OES 0x8D64 +#endif + +/* GL_OES_compressed_paletted_texture */ +#ifndef GL_OES_compressed_paletted_texture +#define GL_PALETTE4_RGB8_OES 0x8B90 +#define GL_PALETTE4_RGBA8_OES 0x8B91 +#define GL_PALETTE4_R5_G6_B5_OES 0x8B92 +#define GL_PALETTE4_RGBA4_OES 0x8B93 +#define GL_PALETTE4_RGB5_A1_OES 0x8B94 +#define GL_PALETTE8_RGB8_OES 0x8B95 +#define GL_PALETTE8_RGBA8_OES 0x8B96 +#define GL_PALETTE8_R5_G6_B5_OES 0x8B97 +#define GL_PALETTE8_RGBA4_OES 0x8B98 +#define GL_PALETTE8_RGB5_A1_OES 0x8B99 +#endif + +/* GL_OES_depth24 */ +#ifndef GL_OES_depth24 +#define GL_DEPTH_COMPONENT24_OES 0x81A6 +#endif + +/* GL_OES_depth32 */ +#ifndef GL_OES_depth32 +#define GL_DEPTH_COMPONENT32_OES 0x81A7 +#endif + +/* GL_OES_depth_texture */ +/* No new tokens introduced by this extension. */ + +/* GL_OES_EGL_image */ +#ifndef GL_OES_EGL_image +typedef void* GLeglImageOES; +#endif + +/* GL_OES_EGL_image_external */ +#ifndef GL_OES_EGL_image_external +/* GLeglImageOES defined in GL_OES_EGL_image already. */ +#define GL_TEXTURE_EXTERNAL_OES 0x8D65 +#define GL_SAMPLER_EXTERNAL_OES 0x8D66 +#define GL_TEXTURE_BINDING_EXTERNAL_OES 0x8D67 +#define GL_REQUIRED_TEXTURE_IMAGE_UNITS_OES 0x8D68 +#endif + +/* GL_OES_element_index_uint */ +#ifndef GL_OES_element_index_uint +#define GL_UNSIGNED_INT 0x1405 +#endif + +/* GL_OES_get_program_binary */ +#ifndef GL_OES_get_program_binary +#define GL_PROGRAM_BINARY_LENGTH_OES 0x8741 +#define GL_NUM_PROGRAM_BINARY_FORMATS_OES 0x87FE +#define GL_PROGRAM_BINARY_FORMATS_OES 0x87FF +#endif + +/* GL_OES_mapbuffer */ +#ifndef GL_OES_mapbuffer +#define GL_WRITE_ONLY_OES 0x88B9 +#define GL_BUFFER_ACCESS_OES 0x88BB +#define GL_BUFFER_MAPPED_OES 0x88BC +#define GL_BUFFER_MAP_POINTER_OES 0x88BD +#endif + +/* GL_OES_packed_depth_stencil */ +#ifndef GL_OES_packed_depth_stencil +#define GL_DEPTH_STENCIL_OES 0x84F9 +#define GL_UNSIGNED_INT_24_8_OES 0x84FA +#define GL_DEPTH24_STENCIL8_OES 0x88F0 +#endif + +/* GL_OES_required_internalformat */ +#ifndef GL_OES_required_internalformat +#define GL_ALPHA8_OES 0x803C +#define GL_DEPTH_COMPONENT16_OES 0x81A5 +/* reuse GL_DEPTH_COMPONENT24_OES */ +/* reuse GL_DEPTH24_STENCIL8_OES */ +/* reuse GL_DEPTH_COMPONENT32_OES */ +#define GL_LUMINANCE4_ALPHA4_OES 0x8043 +#define GL_LUMINANCE8_ALPHA8_OES 0x8045 +#define GL_LUMINANCE8_OES 0x8040 +#define GL_RGBA4_OES 0x8056 +#define GL_RGB5_A1_OES 0x8057 +#define GL_RGB565_OES 0x8D62 +/* reuse GL_RGB8_OES */ +/* reuse GL_RGBA8_OES */ +/* reuse GL_RGB10_EXT */ +/* reuse GL_RGB10_A2_EXT */ +#endif + +/* GL_OES_rgb8_rgba8 */ +#ifndef GL_OES_rgb8_rgba8 +#define GL_RGB8_OES 0x8051 +#define GL_RGBA8_OES 0x8058 +#endif + +/* GL_OES_standard_derivatives */ +#ifndef GL_OES_standard_derivatives +#define GL_FRAGMENT_SHADER_DERIVATIVE_HINT_OES 0x8B8B +#endif + +/* GL_OES_stencil1 */ +#ifndef GL_OES_stencil1 +#define GL_STENCIL_INDEX1_OES 0x8D46 +#endif + +/* GL_OES_stencil4 */ +#ifndef GL_OES_stencil4 +#define GL_STENCIL_INDEX4_OES 0x8D47 +#endif + +#ifndef GL_OES_surfaceless_context +#define GL_FRAMEBUFFER_UNDEFINED_OES 0x8219 +#endif + +/* GL_OES_texture_3D */ +#ifndef GL_OES_texture_3D +#define GL_TEXTURE_WRAP_R_OES 0x8072 +#define GL_TEXTURE_3D_OES 0x806F +#define GL_TEXTURE_BINDING_3D_OES 0x806A +#define GL_MAX_3D_TEXTURE_SIZE_OES 0x8073 +#define GL_SAMPLER_3D_OES 0x8B5F +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_3D_ZOFFSET_OES 0x8CD4 +#endif + +/* GL_OES_texture_float */ +/* No new tokens introduced by this extension. */ + +/* GL_OES_texture_float_linear */ +/* No new tokens introduced by this extension. */ + +/* GL_OES_texture_half_float */ +#ifndef GL_OES_texture_half_float +#define GL_HALF_FLOAT_OES 0x8D61 +#endif + +/* GL_OES_texture_half_float_linear */ +/* No new tokens introduced by this extension. */ + +/* GL_OES_texture_npot */ +/* No new tokens introduced by this extension. */ + +/* GL_OES_vertex_array_object */ +#ifndef GL_OES_vertex_array_object +#define GL_VERTEX_ARRAY_BINDING_OES 0x85B5 +#endif + +/* GL_OES_vertex_half_float */ +/* GL_HALF_FLOAT_OES defined in GL_OES_texture_half_float already. */ + +/* GL_OES_vertex_type_10_10_10_2 */ +#ifndef GL_OES_vertex_type_10_10_10_2 +#define GL_UNSIGNED_INT_10_10_10_2_OES 0x8DF6 +#define GL_INT_10_10_10_2_OES 0x8DF7 +#endif + +/*------------------------------------------------------------------------* + * KHR extension tokens + *------------------------------------------------------------------------*/ + +#ifndef GL_KHR_debug +typedef void (GL_APIENTRYP GLDEBUGPROC)(GLenum source,GLenum type,GLuint id,GLenum severity,GLsizei length,const GLchar *message,GLvoid *userParam); +#define GL_DEBUG_OUTPUT_SYNCHRONOUS 0x8242 +#define GL_DEBUG_NEXT_LOGGED_MESSAGE_LENGTH 0x8243 +#define GL_DEBUG_CALLBACK_FUNCTION 0x8244 +#define GL_DEBUG_CALLBACK_USER_PARAM 0x8245 +#define GL_DEBUG_SOURCE_API 0x8246 +#define GL_DEBUG_SOURCE_WINDOW_SYSTEM 0x8247 +#define GL_DEBUG_SOURCE_SHADER_COMPILER 0x8248 +#define GL_DEBUG_SOURCE_THIRD_PARTY 0x8249 +#define GL_DEBUG_SOURCE_APPLICATION 0x824A +#define GL_DEBUG_SOURCE_OTHER 0x824B +#define GL_DEBUG_TYPE_ERROR 0x824C +#define GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR 0x824D +#define GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR 0x824E +#define GL_DEBUG_TYPE_PORTABILITY 0x824F +#define GL_DEBUG_TYPE_PERFORMANCE 0x8250 +#define GL_DEBUG_TYPE_OTHER 0x8251 +#define GL_DEBUG_TYPE_MARKER 0x8268 +#define GL_DEBUG_TYPE_PUSH_GROUP 0x8269 +#define GL_DEBUG_TYPE_POP_GROUP 0x826A +#define GL_DEBUG_SEVERITY_NOTIFICATION 0x826B +#define GL_MAX_DEBUG_GROUP_STACK_DEPTH 0x826C +#define GL_DEBUG_GROUP_STACK_DEPTH 0x826D +#define GL_BUFFER 0x82E0 +#define GL_SHADER 0x82E1 +#define GL_PROGRAM 0x82E2 +#define GL_QUERY 0x82E3 +/* PROGRAM_PIPELINE only in GL */ +#define GL_SAMPLER 0x82E6 +/* DISPLAY_LIST only in GL */ +#define GL_MAX_LABEL_LENGTH 0x82E8 +#define GL_MAX_DEBUG_MESSAGE_LENGTH 0x9143 +#define GL_MAX_DEBUG_LOGGED_MESSAGES 0x9144 +#define GL_DEBUG_LOGGED_MESSAGES 0x9145 +#define GL_DEBUG_SEVERITY_HIGH 0x9146 +#define GL_DEBUG_SEVERITY_MEDIUM 0x9147 +#define GL_DEBUG_SEVERITY_LOW 0x9148 +#define GL_DEBUG_OUTPUT 0x92E0 +#define GL_CONTEXT_FLAG_DEBUG_BIT 0x00000002 +#define GL_STACK_OVERFLOW 0x0503 +#define GL_STACK_UNDERFLOW 0x0504 +#endif + +#ifndef GL_KHR_texture_compression_astc_ldr +#define GL_COMPRESSED_RGBA_ASTC_4x4_KHR 0x93B0 +#define GL_COMPRESSED_RGBA_ASTC_5x4_KHR 0x93B1 +#define GL_COMPRESSED_RGBA_ASTC_5x5_KHR 0x93B2 +#define GL_COMPRESSED_RGBA_ASTC_6x5_KHR 0x93B3 +#define GL_COMPRESSED_RGBA_ASTC_6x6_KHR 0x93B4 +#define GL_COMPRESSED_RGBA_ASTC_8x5_KHR 0x93B5 +#define GL_COMPRESSED_RGBA_ASTC_8x6_KHR 0x93B6 +#define GL_COMPRESSED_RGBA_ASTC_8x8_KHR 0x93B7 +#define GL_COMPRESSED_RGBA_ASTC_10x5_KHR 0x93B8 +#define GL_COMPRESSED_RGBA_ASTC_10x6_KHR 0x93B9 +#define GL_COMPRESSED_RGBA_ASTC_10x8_KHR 0x93BA +#define GL_COMPRESSED_RGBA_ASTC_10x10_KHR 0x93BB +#define GL_COMPRESSED_RGBA_ASTC_12x10_KHR 0x93BC +#define GL_COMPRESSED_RGBA_ASTC_12x12_KHR 0x93BD +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR 0x93D0 +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR 0x93D1 +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR 0x93D2 +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR 0x93D3 +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR 0x93D4 +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR 0x93D5 +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR 0x93D6 +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR 0x93D7 +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR 0x93D8 +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR 0x93D9 +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR 0x93DA +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR 0x93DB +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR 0x93DC +#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR 0x93DD +#endif + +/*------------------------------------------------------------------------* + * AMD extension tokens + *------------------------------------------------------------------------*/ + +/* GL_AMD_compressed_3DC_texture */ +#ifndef GL_AMD_compressed_3DC_texture +#define GL_3DC_X_AMD 0x87F9 +#define GL_3DC_XY_AMD 0x87FA +#endif + +/* GL_AMD_compressed_ATC_texture */ +#ifndef GL_AMD_compressed_ATC_texture +#define GL_ATC_RGB_AMD 0x8C92 +#define GL_ATC_RGBA_EXPLICIT_ALPHA_AMD 0x8C93 +#define GL_ATC_RGBA_INTERPOLATED_ALPHA_AMD 0x87EE +#endif + +/* GL_AMD_performance_monitor */ +#ifndef GL_AMD_performance_monitor +#define GL_COUNTER_TYPE_AMD 0x8BC0 +#define GL_COUNTER_RANGE_AMD 0x8BC1 +#define GL_UNSIGNED_INT64_AMD 0x8BC2 +#define GL_PERCENTAGE_AMD 0x8BC3 +#define GL_PERFMON_RESULT_AVAILABLE_AMD 0x8BC4 +#define GL_PERFMON_RESULT_SIZE_AMD 0x8BC5 +#define GL_PERFMON_RESULT_AMD 0x8BC6 +#endif + +/* GL_AMD_program_binary_Z400 */ +#ifndef GL_AMD_program_binary_Z400 +#define GL_Z400_BINARY_AMD 0x8740 +#endif + +/*------------------------------------------------------------------------* + * ANGLE extension tokens + *------------------------------------------------------------------------*/ + +/* GL_ANGLE_framebuffer_blit */ +#ifndef GL_ANGLE_framebuffer_blit +#define GL_READ_FRAMEBUFFER_ANGLE 0x8CA8 +#define GL_DRAW_FRAMEBUFFER_ANGLE 0x8CA9 +#define GL_DRAW_FRAMEBUFFER_BINDING_ANGLE 0x8CA6 +#define GL_READ_FRAMEBUFFER_BINDING_ANGLE 0x8CAA +#endif + +/* GL_ANGLE_framebuffer_multisample */ +#ifndef GL_ANGLE_framebuffer_multisample +#define GL_RENDERBUFFER_SAMPLES_ANGLE 0x8CAB +#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE_ANGLE 0x8D56 +#define GL_MAX_SAMPLES_ANGLE 0x8D57 +#endif + +/* GL_ANGLE_instanced_arrays */ +#ifndef GL_ANGLE_instanced_arrays +#define GL_VERTEX_ATTRIB_ARRAY_DIVISOR_ANGLE 0x88FE +#endif + +/* GL_ANGLE_pack_reverse_row_order */ +#ifndef GL_ANGLE_pack_reverse_row_order +#define GL_PACK_REVERSE_ROW_ORDER_ANGLE 0x93A4 +#endif + +/* GL_ANGLE_texture_compression_dxt3 */ +#ifndef GL_ANGLE_texture_compression_dxt3 +#define GL_COMPRESSED_RGBA_S3TC_DXT3_ANGLE 0x83F2 +#endif + +/* GL_ANGLE_texture_compression_dxt5 */ +#ifndef GL_ANGLE_texture_compression_dxt5 +#define GL_COMPRESSED_RGBA_S3TC_DXT5_ANGLE 0x83F3 +#endif + +/* GL_ANGLE_texture_usage */ +#ifndef GL_ANGLE_texture_usage +#define GL_TEXTURE_USAGE_ANGLE 0x93A2 +#define GL_FRAMEBUFFER_ATTACHMENT_ANGLE 0x93A3 +#endif + +/* GL_ANGLE_translated_shader_source */ +#ifndef GL_ANGLE_translated_shader_source +#define GL_TRANSLATED_SHADER_SOURCE_LENGTH_ANGLE 0x93A0 +#endif + +/*------------------------------------------------------------------------* + * APPLE extension tokens + *------------------------------------------------------------------------*/ + +/* GL_APPLE_copy_texture_levels */ +/* No new tokens introduced by this extension. */ + +/* GL_APPLE_framebuffer_multisample */ +#ifndef GL_APPLE_framebuffer_multisample +#define GL_RENDERBUFFER_SAMPLES_APPLE 0x8CAB +#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE_APPLE 0x8D56 +#define GL_MAX_SAMPLES_APPLE 0x8D57 +#define GL_READ_FRAMEBUFFER_APPLE 0x8CA8 +#define GL_DRAW_FRAMEBUFFER_APPLE 0x8CA9 +#define GL_DRAW_FRAMEBUFFER_BINDING_APPLE 0x8CA6 +#define GL_READ_FRAMEBUFFER_BINDING_APPLE 0x8CAA +#endif + +/* GL_APPLE_rgb_422 */ +#ifndef GL_APPLE_rgb_422 +#define GL_RGB_422_APPLE 0x8A1F +#define GL_UNSIGNED_SHORT_8_8_APPLE 0x85BA +#define GL_UNSIGNED_SHORT_8_8_REV_APPLE 0x85BB +#endif + +/* GL_APPLE_sync */ +#ifndef GL_APPLE_sync + +#ifndef __gl3_h_ +/* These types are defined with reference to + * in the Apple extension spec, but here we use the Khronos + * portable types in khrplatform.h, and assume those types + * are always defined. + * If any other extensions using these types are defined, + * the typedefs must move out of this block and be shared. + */ +typedef khronos_int64_t GLint64; +typedef khronos_uint64_t GLuint64; +typedef struct __GLsync *GLsync; +#endif + +#define GL_SYNC_OBJECT_APPLE 0x8A53 +#define GL_MAX_SERVER_WAIT_TIMEOUT_APPLE 0x9111 +#define GL_OBJECT_TYPE_APPLE 0x9112 +#define GL_SYNC_CONDITION_APPLE 0x9113 +#define GL_SYNC_STATUS_APPLE 0x9114 +#define GL_SYNC_FLAGS_APPLE 0x9115 +#define GL_SYNC_FENCE_APPLE 0x9116 +#define GL_SYNC_GPU_COMMANDS_COMPLETE_APPLE 0x9117 +#define GL_UNSIGNALED_APPLE 0x9118 +#define GL_SIGNALED_APPLE 0x9119 +#define GL_ALREADY_SIGNALED_APPLE 0x911A +#define GL_TIMEOUT_EXPIRED_APPLE 0x911B +#define GL_CONDITION_SATISFIED_APPLE 0x911C +#define GL_WAIT_FAILED_APPLE 0x911D +#define GL_SYNC_FLUSH_COMMANDS_BIT_APPLE 0x00000001 +#define GL_TIMEOUT_IGNORED_APPLE 0xFFFFFFFFFFFFFFFFull +#endif + +/* GL_APPLE_texture_format_BGRA8888 */ +#ifndef GL_APPLE_texture_format_BGRA8888 +#define GL_BGRA_EXT 0x80E1 +#endif + +/* GL_APPLE_texture_max_level */ +#ifndef GL_APPLE_texture_max_level +#define GL_TEXTURE_MAX_LEVEL_APPLE 0x813D +#endif + +/*------------------------------------------------------------------------* + * ARM extension tokens + *------------------------------------------------------------------------*/ + +/* GL_ARM_mali_program_binary */ +#ifndef GL_ARM_mali_program_binary +#define GL_MALI_PROGRAM_BINARY_ARM 0x8F61 +#endif + +/* GL_ARM_mali_shader_binary */ +#ifndef GL_ARM_mali_shader_binary +#define GL_MALI_SHADER_BINARY_ARM 0x8F60 +#endif + +/* GL_ARM_rgba8 */ +/* No new tokens introduced by this extension. */ + +/*------------------------------------------------------------------------* + * EXT extension tokens + *------------------------------------------------------------------------*/ + +/* GL_EXT_blend_minmax */ +#ifndef GL_EXT_blend_minmax +#define GL_MIN_EXT 0x8007 +#define GL_MAX_EXT 0x8008 +#endif + +/* GL_EXT_color_buffer_half_float */ +#ifndef GL_EXT_color_buffer_half_float +#define GL_RGBA16F_EXT 0x881A +#define GL_RGB16F_EXT 0x881B +#define GL_RG16F_EXT 0x822F +#define GL_R16F_EXT 0x822D +#define GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE_EXT 0x8211 +#define GL_UNSIGNED_NORMALIZED_EXT 0x8C17 +#endif + +/* GL_EXT_debug_label */ +#ifndef GL_EXT_debug_label +#define GL_PROGRAM_PIPELINE_OBJECT_EXT 0x8A4F +#define GL_PROGRAM_OBJECT_EXT 0x8B40 +#define GL_SHADER_OBJECT_EXT 0x8B48 +#define GL_BUFFER_OBJECT_EXT 0x9151 +#define GL_QUERY_OBJECT_EXT 0x9153 +#define GL_VERTEX_ARRAY_OBJECT_EXT 0x9154 +#endif + +/* GL_EXT_debug_marker */ +/* No new tokens introduced by this extension. */ + +/* GL_EXT_discard_framebuffer */ +#ifndef GL_EXT_discard_framebuffer +#define GL_COLOR_EXT 0x1800 +#define GL_DEPTH_EXT 0x1801 +#define GL_STENCIL_EXT 0x1802 +#endif + +/* GL_EXT_map_buffer_range */ +#ifndef GL_EXT_map_buffer_range +#define GL_MAP_READ_BIT_EXT 0x0001 +#define GL_MAP_WRITE_BIT_EXT 0x0002 +#define GL_MAP_INVALIDATE_RANGE_BIT_EXT 0x0004 +#define GL_MAP_INVALIDATE_BUFFER_BIT_EXT 0x0008 +#define GL_MAP_FLUSH_EXPLICIT_BIT_EXT 0x0010 +#define GL_MAP_UNSYNCHRONIZED_BIT_EXT 0x0020 +#endif + +/* GL_EXT_multisampled_render_to_texture */ +#ifndef GL_EXT_multisampled_render_to_texture +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_SAMPLES_EXT 0x8D6C +/* reuse values from GL_EXT_framebuffer_multisample (desktop extension) */ +#define GL_RENDERBUFFER_SAMPLES_EXT 0x8CAB +#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE_EXT 0x8D56 +#define GL_MAX_SAMPLES_EXT 0x8D57 +#endif + +/* GL_EXT_multiview_draw_buffers */ +#ifndef GL_EXT_multiview_draw_buffers +#define GL_COLOR_ATTACHMENT_EXT 0x90F0 +#define GL_MULTIVIEW_EXT 0x90F1 +#define GL_DRAW_BUFFER_EXT 0x0C01 +#define GL_READ_BUFFER_EXT 0x0C02 +#define GL_MAX_MULTIVIEW_BUFFERS_EXT 0x90F2 +#endif + +/* GL_EXT_multi_draw_arrays */ +/* No new tokens introduced by this extension. */ + +/* GL_EXT_occlusion_query_boolean */ +#ifndef GL_EXT_occlusion_query_boolean +#define GL_ANY_SAMPLES_PASSED_EXT 0x8C2F +#define GL_ANY_SAMPLES_PASSED_CONSERVATIVE_EXT 0x8D6A +#define GL_CURRENT_QUERY_EXT 0x8865 +#define GL_QUERY_RESULT_EXT 0x8866 +#define GL_QUERY_RESULT_AVAILABLE_EXT 0x8867 +#endif + +/* GL_EXT_read_format_bgra */ +#ifndef GL_EXT_read_format_bgra +#define GL_BGRA_EXT 0x80E1 +#define GL_UNSIGNED_SHORT_4_4_4_4_REV_EXT 0x8365 +#define GL_UNSIGNED_SHORT_1_5_5_5_REV_EXT 0x8366 +#endif + +/* GL_EXT_robustness */ +#ifndef GL_EXT_robustness +/* reuse GL_NO_ERROR */ +#define GL_GUILTY_CONTEXT_RESET_EXT 0x8253 +#define GL_INNOCENT_CONTEXT_RESET_EXT 0x8254 +#define GL_UNKNOWN_CONTEXT_RESET_EXT 0x8255 +#define GL_CONTEXT_ROBUST_ACCESS_EXT 0x90F3 +#define GL_RESET_NOTIFICATION_STRATEGY_EXT 0x8256 +#define GL_LOSE_CONTEXT_ON_RESET_EXT 0x8252 +#define GL_NO_RESET_NOTIFICATION_EXT 0x8261 +#endif + +/* GL_EXT_separate_shader_objects */ +#ifndef GL_EXT_separate_shader_objects +#define GL_VERTEX_SHADER_BIT_EXT 0x00000001 +#define GL_FRAGMENT_SHADER_BIT_EXT 0x00000002 +#define GL_ALL_SHADER_BITS_EXT 0xFFFFFFFF +#define GL_PROGRAM_SEPARABLE_EXT 0x8258 +#define GL_ACTIVE_PROGRAM_EXT 0x8259 +#define GL_PROGRAM_PIPELINE_BINDING_EXT 0x825A +#endif + +/* GL_EXT_shader_framebuffer_fetch */ +#ifndef GL_EXT_shader_framebuffer_fetch +#define GL_FRAGMENT_SHADER_DISCARDS_SAMPLES_EXT 0x8A52 +#endif + +/* GL_EXT_shader_texture_lod */ +/* No new tokens introduced by this extension. */ + +/* GL_EXT_shadow_samplers */ +#ifndef GL_EXT_shadow_samplers +#define GL_TEXTURE_COMPARE_MODE_EXT 0x884C +#define GL_TEXTURE_COMPARE_FUNC_EXT 0x884D +#define GL_COMPARE_REF_TO_TEXTURE_EXT 0x884E +#define GL_SAMPLER_2D_SHADOW_EXT 0x8B62 +#endif + +/* GL_EXT_sRGB */ +#ifndef GL_EXT_sRGB +#define GL_SRGB_EXT 0x8C40 +#define GL_SRGB_ALPHA_EXT 0x8C42 +#define GL_SRGB8_ALPHA8_EXT 0x8C43 +#define GL_FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING_EXT 0x8210 +#endif + +/* GL_EXT_texture_compression_dxt1 */ +#ifndef GL_EXT_texture_compression_dxt1 +#define GL_COMPRESSED_RGB_S3TC_DXT1_EXT 0x83F0 +#define GL_COMPRESSED_RGBA_S3TC_DXT1_EXT 0x83F1 +#endif + +/* GL_EXT_texture_filter_anisotropic */ +#ifndef GL_EXT_texture_filter_anisotropic +#define GL_TEXTURE_MAX_ANISOTROPY_EXT 0x84FE +#define GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT 0x84FF +#endif + +/* GL_EXT_texture_format_BGRA8888 */ +#ifndef GL_EXT_texture_format_BGRA8888 +#define GL_BGRA_EXT 0x80E1 +#endif + +/* GL_EXT_texture_rg */ +#ifndef GL_EXT_texture_rg +#define GL_RED_EXT 0x1903 +#define GL_RG_EXT 0x8227 +#define GL_R8_EXT 0x8229 +#define GL_RG8_EXT 0x822B +#endif + +/* GL_EXT_texture_storage */ +#ifndef GL_EXT_texture_storage +#define GL_TEXTURE_IMMUTABLE_FORMAT_EXT 0x912F +#define GL_ALPHA8_EXT 0x803C +#define GL_LUMINANCE8_EXT 0x8040 +#define GL_LUMINANCE8_ALPHA8_EXT 0x8045 +#define GL_RGBA32F_EXT 0x8814 +#define GL_RGB32F_EXT 0x8815 +#define GL_ALPHA32F_EXT 0x8816 +#define GL_LUMINANCE32F_EXT 0x8818 +#define GL_LUMINANCE_ALPHA32F_EXT 0x8819 +/* reuse GL_RGBA16F_EXT */ +/* reuse GL_RGB16F_EXT */ +#define GL_ALPHA16F_EXT 0x881C +#define GL_LUMINANCE16F_EXT 0x881E +#define GL_LUMINANCE_ALPHA16F_EXT 0x881F +#define GL_RGB10_A2_EXT 0x8059 +#define GL_RGB10_EXT 0x8052 +#define GL_BGRA8_EXT 0x93A1 +#define GL_R8_EXT 0x8229 +#define GL_RG8_EXT 0x822B +#define GL_R32F_EXT 0x822E +#define GL_RG32F_EXT 0x8230 +#define GL_R16F_EXT 0x822D +#define GL_RG16F_EXT 0x822F +#endif + +/* GL_EXT_texture_type_2_10_10_10_REV */ +#ifndef GL_EXT_texture_type_2_10_10_10_REV +#define GL_UNSIGNED_INT_2_10_10_10_REV_EXT 0x8368 +#endif + +/* GL_EXT_unpack_subimage */ +#ifndef GL_EXT_unpack_subimage +#define GL_UNPACK_ROW_LENGTH 0x0CF2 +#define GL_UNPACK_SKIP_ROWS 0x0CF3 +#define GL_UNPACK_SKIP_PIXELS 0x0CF4 +#endif + +/*------------------------------------------------------------------------* + * DMP extension tokens + *------------------------------------------------------------------------*/ + +/* GL_DMP_shader_binary */ +#ifndef GL_DMP_shader_binary +#define GL_SHADER_BINARY_DMP 0x9250 +#endif + +/*------------------------------------------------------------------------* + * FJ extension tokens + *------------------------------------------------------------------------*/ + +/* GL_FJ_shader_binary_GCCSO */ +#ifndef GL_FJ_shader_binary_GCCSO +#define GCCSO_SHADER_BINARY_FJ 0x9260 +#endif + +/*------------------------------------------------------------------------* + * IMG extension tokens + *------------------------------------------------------------------------*/ + +/* GL_IMG_program_binary */ +#ifndef GL_IMG_program_binary +#define GL_SGX_PROGRAM_BINARY_IMG 0x9130 +#endif + +/* GL_IMG_read_format */ +#ifndef GL_IMG_read_format +#define GL_BGRA_IMG 0x80E1 +#define GL_UNSIGNED_SHORT_4_4_4_4_REV_IMG 0x8365 +#endif + +/* GL_IMG_shader_binary */ +#ifndef GL_IMG_shader_binary +#define GL_SGX_BINARY_IMG 0x8C0A +#endif + +/* GL_IMG_texture_compression_pvrtc */ +#ifndef GL_IMG_texture_compression_pvrtc +#define GL_COMPRESSED_RGB_PVRTC_4BPPV1_IMG 0x8C00 +#define GL_COMPRESSED_RGB_PVRTC_2BPPV1_IMG 0x8C01 +#define GL_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG 0x8C02 +#define GL_COMPRESSED_RGBA_PVRTC_2BPPV1_IMG 0x8C03 +#endif + +/* GL_IMG_multisampled_render_to_texture */ +#ifndef GL_IMG_multisampled_render_to_texture +#define GL_RENDERBUFFER_SAMPLES_IMG 0x9133 +#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE_IMG 0x9134 +#define GL_MAX_SAMPLES_IMG 0x9135 +#define GL_TEXTURE_SAMPLES_IMG 0x9136 +#endif + +/*------------------------------------------------------------------------* + * NV extension tokens + *------------------------------------------------------------------------*/ + +/* GL_NV_coverage_sample */ +#ifndef GL_NV_coverage_sample +#define GL_COVERAGE_COMPONENT_NV 0x8ED0 +#define GL_COVERAGE_COMPONENT4_NV 0x8ED1 +#define GL_COVERAGE_ATTACHMENT_NV 0x8ED2 +#define GL_COVERAGE_BUFFERS_NV 0x8ED3 +#define GL_COVERAGE_SAMPLES_NV 0x8ED4 +#define GL_COVERAGE_ALL_FRAGMENTS_NV 0x8ED5 +#define GL_COVERAGE_EDGE_FRAGMENTS_NV 0x8ED6 +#define GL_COVERAGE_AUTOMATIC_NV 0x8ED7 +#define GL_COVERAGE_BUFFER_BIT_NV 0x8000 +#endif + +/* GL_NV_depth_nonlinear */ +#ifndef GL_NV_depth_nonlinear +#define GL_DEPTH_COMPONENT16_NONLINEAR_NV 0x8E2C +#endif + +/* GL_NV_draw_buffers */ +#ifndef GL_NV_draw_buffers +#define GL_MAX_DRAW_BUFFERS_NV 0x8824 +#define GL_DRAW_BUFFER0_NV 0x8825 +#define GL_DRAW_BUFFER1_NV 0x8826 +#define GL_DRAW_BUFFER2_NV 0x8827 +#define GL_DRAW_BUFFER3_NV 0x8828 +#define GL_DRAW_BUFFER4_NV 0x8829 +#define GL_DRAW_BUFFER5_NV 0x882A +#define GL_DRAW_BUFFER6_NV 0x882B +#define GL_DRAW_BUFFER7_NV 0x882C +#define GL_DRAW_BUFFER8_NV 0x882D +#define GL_DRAW_BUFFER9_NV 0x882E +#define GL_DRAW_BUFFER10_NV 0x882F +#define GL_DRAW_BUFFER11_NV 0x8830 +#define GL_DRAW_BUFFER12_NV 0x8831 +#define GL_DRAW_BUFFER13_NV 0x8832 +#define GL_DRAW_BUFFER14_NV 0x8833 +#define GL_DRAW_BUFFER15_NV 0x8834 +#define GL_COLOR_ATTACHMENT0_NV 0x8CE0 +#define GL_COLOR_ATTACHMENT1_NV 0x8CE1 +#define GL_COLOR_ATTACHMENT2_NV 0x8CE2 +#define GL_COLOR_ATTACHMENT3_NV 0x8CE3 +#define GL_COLOR_ATTACHMENT4_NV 0x8CE4 +#define GL_COLOR_ATTACHMENT5_NV 0x8CE5 +#define GL_COLOR_ATTACHMENT6_NV 0x8CE6 +#define GL_COLOR_ATTACHMENT7_NV 0x8CE7 +#define GL_COLOR_ATTACHMENT8_NV 0x8CE8 +#define GL_COLOR_ATTACHMENT9_NV 0x8CE9 +#define GL_COLOR_ATTACHMENT10_NV 0x8CEA +#define GL_COLOR_ATTACHMENT11_NV 0x8CEB +#define GL_COLOR_ATTACHMENT12_NV 0x8CEC +#define GL_COLOR_ATTACHMENT13_NV 0x8CED +#define GL_COLOR_ATTACHMENT14_NV 0x8CEE +#define GL_COLOR_ATTACHMENT15_NV 0x8CEF +#endif + +/* GL_NV_fbo_color_attachments */ +#ifndef GL_NV_fbo_color_attachments +#define GL_MAX_COLOR_ATTACHMENTS_NV 0x8CDF +/* GL_COLOR_ATTACHMENT{0-15}_NV defined in GL_NV_draw_buffers already. */ +#endif + +/* GL_NV_fence */ +#ifndef GL_NV_fence +#define GL_ALL_COMPLETED_NV 0x84F2 +#define GL_FENCE_STATUS_NV 0x84F3 +#define GL_FENCE_CONDITION_NV 0x84F4 +#endif + +/* GL_NV_read_buffer */ +#ifndef GL_NV_read_buffer +#define GL_READ_BUFFER_NV 0x0C02 +#endif + +/* GL_NV_read_buffer_front */ +/* No new tokens introduced by this extension. */ + +/* GL_NV_read_depth */ +/* No new tokens introduced by this extension. */ + +/* GL_NV_read_depth_stencil */ +/* No new tokens introduced by this extension. */ + +/* GL_NV_read_stencil */ +/* No new tokens introduced by this extension. */ + +/* GL_NV_texture_compression_s3tc_update */ +/* No new tokens introduced by this extension. */ + +/* GL_NV_texture_npot_2D_mipmap */ +/* No new tokens introduced by this extension. */ + +/*------------------------------------------------------------------------* + * QCOM extension tokens + *------------------------------------------------------------------------*/ + +/* GL_QCOM_alpha_test */ +#ifndef GL_QCOM_alpha_test +#define GL_ALPHA_TEST_QCOM 0x0BC0 +#define GL_ALPHA_TEST_FUNC_QCOM 0x0BC1 +#define GL_ALPHA_TEST_REF_QCOM 0x0BC2 +#endif + +/* GL_QCOM_binning_control */ +#ifndef GL_QCOM_binning_control +#define GL_BINNING_CONTROL_HINT_QCOM 0x8FB0 +#define GL_CPU_OPTIMIZED_QCOM 0x8FB1 +#define GL_GPU_OPTIMIZED_QCOM 0x8FB2 +#define GL_RENDER_DIRECT_TO_FRAMEBUFFER_QCOM 0x8FB3 +#endif + +/* GL_QCOM_driver_control */ +/* No new tokens introduced by this extension. */ + +/* GL_QCOM_extended_get */ +#ifndef GL_QCOM_extended_get +#define GL_TEXTURE_WIDTH_QCOM 0x8BD2 +#define GL_TEXTURE_HEIGHT_QCOM 0x8BD3 +#define GL_TEXTURE_DEPTH_QCOM 0x8BD4 +#define GL_TEXTURE_INTERNAL_FORMAT_QCOM 0x8BD5 +#define GL_TEXTURE_FORMAT_QCOM 0x8BD6 +#define GL_TEXTURE_TYPE_QCOM 0x8BD7 +#define GL_TEXTURE_IMAGE_VALID_QCOM 0x8BD8 +#define GL_TEXTURE_NUM_LEVELS_QCOM 0x8BD9 +#define GL_TEXTURE_TARGET_QCOM 0x8BDA +#define GL_TEXTURE_OBJECT_VALID_QCOM 0x8BDB +#define GL_STATE_RESTORE 0x8BDC +#endif + +/* GL_QCOM_extended_get2 */ +/* No new tokens introduced by this extension. */ + +/* GL_QCOM_perfmon_global_mode */ +#ifndef GL_QCOM_perfmon_global_mode +#define GL_PERFMON_GLOBAL_MODE_QCOM 0x8FA0 +#endif + +/* GL_QCOM_writeonly_rendering */ +#ifndef GL_QCOM_writeonly_rendering +#define GL_WRITEONLY_RENDERING_QCOM 0x8823 +#endif + +/* GL_QCOM_tiled_rendering */ +#ifndef GL_QCOM_tiled_rendering +#define GL_COLOR_BUFFER_BIT0_QCOM 0x00000001 +#define GL_COLOR_BUFFER_BIT1_QCOM 0x00000002 +#define GL_COLOR_BUFFER_BIT2_QCOM 0x00000004 +#define GL_COLOR_BUFFER_BIT3_QCOM 0x00000008 +#define GL_COLOR_BUFFER_BIT4_QCOM 0x00000010 +#define GL_COLOR_BUFFER_BIT5_QCOM 0x00000020 +#define GL_COLOR_BUFFER_BIT6_QCOM 0x00000040 +#define GL_COLOR_BUFFER_BIT7_QCOM 0x00000080 +#define GL_DEPTH_BUFFER_BIT0_QCOM 0x00000100 +#define GL_DEPTH_BUFFER_BIT1_QCOM 0x00000200 +#define GL_DEPTH_BUFFER_BIT2_QCOM 0x00000400 +#define GL_DEPTH_BUFFER_BIT3_QCOM 0x00000800 +#define GL_DEPTH_BUFFER_BIT4_QCOM 0x00001000 +#define GL_DEPTH_BUFFER_BIT5_QCOM 0x00002000 +#define GL_DEPTH_BUFFER_BIT6_QCOM 0x00004000 +#define GL_DEPTH_BUFFER_BIT7_QCOM 0x00008000 +#define GL_STENCIL_BUFFER_BIT0_QCOM 0x00010000 +#define GL_STENCIL_BUFFER_BIT1_QCOM 0x00020000 +#define GL_STENCIL_BUFFER_BIT2_QCOM 0x00040000 +#define GL_STENCIL_BUFFER_BIT3_QCOM 0x00080000 +#define GL_STENCIL_BUFFER_BIT4_QCOM 0x00100000 +#define GL_STENCIL_BUFFER_BIT5_QCOM 0x00200000 +#define GL_STENCIL_BUFFER_BIT6_QCOM 0x00400000 +#define GL_STENCIL_BUFFER_BIT7_QCOM 0x00800000 +#define GL_MULTISAMPLE_BUFFER_BIT0_QCOM 0x01000000 +#define GL_MULTISAMPLE_BUFFER_BIT1_QCOM 0x02000000 +#define GL_MULTISAMPLE_BUFFER_BIT2_QCOM 0x04000000 +#define GL_MULTISAMPLE_BUFFER_BIT3_QCOM 0x08000000 +#define GL_MULTISAMPLE_BUFFER_BIT4_QCOM 0x10000000 +#define GL_MULTISAMPLE_BUFFER_BIT5_QCOM 0x20000000 +#define GL_MULTISAMPLE_BUFFER_BIT6_QCOM 0x40000000 +#define GL_MULTISAMPLE_BUFFER_BIT7_QCOM 0x80000000 +#endif + +/*------------------------------------------------------------------------* + * VIV extension tokens + *------------------------------------------------------------------------*/ + +/* GL_VIV_shader_binary */ +#ifndef GL_VIV_shader_binary +#define GL_SHADER_BINARY_VIV 0x8FC4 +#endif + +/*------------------------------------------------------------------------* + * End of extension tokens, start of corresponding extension functions + *------------------------------------------------------------------------*/ + +/*------------------------------------------------------------------------* + * OES extension functions + *------------------------------------------------------------------------*/ + +/* GL_OES_compressed_ETC1_RGB8_texture */ +#ifndef GL_OES_compressed_ETC1_RGB8_texture +#define GL_OES_compressed_ETC1_RGB8_texture 1 +#endif + +/* GL_OES_compressed_paletted_texture */ +#ifndef GL_OES_compressed_paletted_texture +#define GL_OES_compressed_paletted_texture 1 +#endif + +/* GL_OES_depth24 */ +#ifndef GL_OES_depth24 +#define GL_OES_depth24 1 +#endif + +/* GL_OES_depth32 */ +#ifndef GL_OES_depth32 +#define GL_OES_depth32 1 +#endif + +/* GL_OES_depth_texture */ +#ifndef GL_OES_depth_texture +#define GL_OES_depth_texture 1 +#endif + +/* GL_OES_EGL_image */ +#ifndef GL_OES_EGL_image +#define GL_OES_EGL_image 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glEGLImageTargetTexture2DOES (GLenum target, GLeglImageOES image); +GL_APICALL void GL_APIENTRY glEGLImageTargetRenderbufferStorageOES (GLenum target, GLeglImageOES image); +#endif +typedef void (GL_APIENTRYP PFNGLEGLIMAGETARGETTEXTURE2DOESPROC) (GLenum target, GLeglImageOES image); +typedef void (GL_APIENTRYP PFNGLEGLIMAGETARGETRENDERBUFFERSTORAGEOESPROC) (GLenum target, GLeglImageOES image); +#endif + +/* GL_OES_EGL_image_external */ +#ifndef GL_OES_EGL_image_external +#define GL_OES_EGL_image_external 1 +/* glEGLImageTargetTexture2DOES defined in GL_OES_EGL_image already. */ +#endif + +/* GL_OES_element_index_uint */ +#ifndef GL_OES_element_index_uint +#define GL_OES_element_index_uint 1 +#endif + +/* GL_OES_fbo_render_mipmap */ +#ifndef GL_OES_fbo_render_mipmap +#define GL_OES_fbo_render_mipmap 1 +#endif + +/* GL_OES_fragment_precision_high */ +#ifndef GL_OES_fragment_precision_high +#define GL_OES_fragment_precision_high 1 +#endif + +/* GL_OES_get_program_binary */ +#ifndef GL_OES_get_program_binary +#define GL_OES_get_program_binary 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glGetProgramBinaryOES (GLuint program, GLsizei bufSize, GLsizei *length, GLenum *binaryFormat, GLvoid *binary); +GL_APICALL void GL_APIENTRY glProgramBinaryOES (GLuint program, GLenum binaryFormat, const GLvoid *binary, GLint length); +#endif +typedef void (GL_APIENTRYP PFNGLGETPROGRAMBINARYOESPROC) (GLuint program, GLsizei bufSize, GLsizei *length, GLenum *binaryFormat, GLvoid *binary); +typedef void (GL_APIENTRYP PFNGLPROGRAMBINARYOESPROC) (GLuint program, GLenum binaryFormat, const GLvoid *binary, GLint length); +#endif + +/* GL_OES_mapbuffer */ +#ifndef GL_OES_mapbuffer +#define GL_OES_mapbuffer 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void* GL_APIENTRY glMapBufferOES (GLenum target, GLenum access); +GL_APICALL GLboolean GL_APIENTRY glUnmapBufferOES (GLenum target); +GL_APICALL void GL_APIENTRY glGetBufferPointervOES (GLenum target, GLenum pname, GLvoid** params); +#endif +typedef void* (GL_APIENTRYP PFNGLMAPBUFFEROESPROC) (GLenum target, GLenum access); +typedef GLboolean (GL_APIENTRYP PFNGLUNMAPBUFFEROESPROC) (GLenum target); +typedef void (GL_APIENTRYP PFNGLGETBUFFERPOINTERVOESPROC) (GLenum target, GLenum pname, GLvoid** params); +#endif + +/* GL_OES_packed_depth_stencil */ +#ifndef GL_OES_packed_depth_stencil +#define GL_OES_packed_depth_stencil 1 +#endif + +/* GL_OES_required_internalformat */ +#ifndef GL_OES_required_internalformat +#define GL_OES_required_internalformat 1 +#endif + +/* GL_OES_rgb8_rgba8 */ +#ifndef GL_OES_rgb8_rgba8 +#define GL_OES_rgb8_rgba8 1 +#endif + +/* GL_OES_standard_derivatives */ +#ifndef GL_OES_standard_derivatives +#define GL_OES_standard_derivatives 1 +#endif + +/* GL_OES_stencil1 */ +#ifndef GL_OES_stencil1 +#define GL_OES_stencil1 1 +#endif + +/* GL_OES_stencil4 */ +#ifndef GL_OES_stencil4 +#define GL_OES_stencil4 1 +#endif + +#ifndef GL_OES_surfaceless_context +#define GL_OES_surfaceless_context 1 +#endif + +/* GL_OES_texture_3D */ +#ifndef GL_OES_texture_3D +#define GL_OES_texture_3D 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glTexImage3DOES (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const GLvoid* pixels); +GL_APICALL void GL_APIENTRY glTexSubImage3DOES (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const GLvoid* pixels); +GL_APICALL void GL_APIENTRY glCopyTexSubImage3DOES (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height); +GL_APICALL void GL_APIENTRY glCompressedTexImage3DOES (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const GLvoid* data); +GL_APICALL void GL_APIENTRY glCompressedTexSubImage3DOES (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const GLvoid* data); +GL_APICALL void GL_APIENTRY glFramebufferTexture3DOES (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLint zoffset); +#endif +typedef void (GL_APIENTRYP PFNGLTEXIMAGE3DOESPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const GLvoid* pixels); +typedef void (GL_APIENTRYP PFNGLTEXSUBIMAGE3DOESPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const GLvoid* pixels); +typedef void (GL_APIENTRYP PFNGLCOPYTEXSUBIMAGE3DOESPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height); +typedef void (GL_APIENTRYP PFNGLCOMPRESSEDTEXIMAGE3DOESPROC) (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const GLvoid* data); +typedef void (GL_APIENTRYP PFNGLCOMPRESSEDTEXSUBIMAGE3DOESPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const GLvoid* data); +typedef void (GL_APIENTRYP PFNGLFRAMEBUFFERTEXTURE3DOES) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLint zoffset); +#endif + +/* GL_OES_texture_float */ +#ifndef GL_OES_texture_float +#define GL_OES_texture_float 1 +#endif + +/* GL_OES_texture_float_linear */ +#ifndef GL_OES_texture_float_linear +#define GL_OES_texture_float_linear 1 +#endif + +/* GL_OES_texture_half_float */ +#ifndef GL_OES_texture_half_float +#define GL_OES_texture_half_float 1 +#endif + +/* GL_OES_texture_half_float_linear */ +#ifndef GL_OES_texture_half_float_linear +#define GL_OES_texture_half_float_linear 1 +#endif + +/* GL_OES_texture_npot */ +#ifndef GL_OES_texture_npot +#define GL_OES_texture_npot 1 +#endif + +/* GL_OES_vertex_array_object */ +#ifndef GL_OES_vertex_array_object +#define GL_OES_vertex_array_object 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glBindVertexArrayOES (GLuint array); +GL_APICALL void GL_APIENTRY glDeleteVertexArraysOES (GLsizei n, const GLuint *arrays); +GL_APICALL void GL_APIENTRY glGenVertexArraysOES (GLsizei n, GLuint *arrays); +GL_APICALL GLboolean GL_APIENTRY glIsVertexArrayOES (GLuint array); +#endif +typedef void (GL_APIENTRYP PFNGLBINDVERTEXARRAYOESPROC) (GLuint array); +typedef void (GL_APIENTRYP PFNGLDELETEVERTEXARRAYSOESPROC) (GLsizei n, const GLuint *arrays); +typedef void (GL_APIENTRYP PFNGLGENVERTEXARRAYSOESPROC) (GLsizei n, GLuint *arrays); +typedef GLboolean (GL_APIENTRYP PFNGLISVERTEXARRAYOESPROC) (GLuint array); +#endif + +/* GL_OES_vertex_half_float */ +#ifndef GL_OES_vertex_half_float +#define GL_OES_vertex_half_float 1 +#endif + +/* GL_OES_vertex_type_10_10_10_2 */ +#ifndef GL_OES_vertex_type_10_10_10_2 +#define GL_OES_vertex_type_10_10_10_2 1 +#endif + +/*------------------------------------------------------------------------* + * KHR extension functions + *------------------------------------------------------------------------*/ + +#ifndef GL_KHR_debug +#define GL_KHR_debug 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glDebugMessageControl (GLenum source, GLenum type, GLenum severity, GLsizei count, const GLuint *ids, GLboolean enabled); +GL_APICALL void GL_APIENTRY glDebugMessageInsert (GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar *buf); +GL_APICALL void GL_APIENTRY glDebugMessageCallback (GLDEBUGPROC callback, const void *userParam); +GL_APICALL GLuint GL_APIENTRY glGetDebugMessageLog (GLuint count, GLsizei bufsize, GLenum *sources, GLenum *types, GLuint *ids, GLenum *severities, GLsizei *lengths, GLchar *messageLog); +GL_APICALL void GL_APIENTRY glPushDebugGroup (GLenum source, GLuint id, GLsizei length, const GLchar *message); +GL_APICALL void GL_APIENTRY glPopDebugGroup (void); +GL_APICALL void GL_APIENTRY glObjectLabel (GLenum identifier, GLuint name, GLsizei length, const GLchar *label); +GL_APICALL void GL_APIENTRY glGetObjectLabel (GLenum identifier, GLuint name, GLsizei bufSize, GLsizei *length, GLchar *label); +GL_APICALL void GL_APIENTRY glObjectPtrLabel (const void *ptr, GLsizei length, const GLchar *label); +GL_APICALL void GL_APIENTRY glGetObjectPtrLabel (const void *ptr, GLsizei bufSize, GLsizei *length, GLchar *label); +GL_APICALL void GL_APIENTRY glGetPointerv (GLenum pname, void **params); +#endif +typedef void (GL_APIENTRYP PFNGLDEBUGMESSAGECONTROLPROC) (GLenum source, GLenum type, GLenum severity, GLsizei count, const GLuint *ids, GLboolean enabled); +typedef void (GL_APIENTRYP PFNGLDEBUGMESSAGEINSERTPROC) (GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar *buf); +typedef void (GL_APIENTRYP PFNGLDEBUGMESSAGECALLBACKPROC) (GLDEBUGPROC callback, const void *userParam); +typedef GLuint (GL_APIENTRYP PFNGLGETDEBUGMESSAGELOGPROC) (GLuint count, GLsizei bufsize, GLenum *sources, GLenum *types, GLuint *ids, GLenum *severities, GLsizei *lengths, GLchar *messageLog); +typedef void (GL_APIENTRYP PFNGLPUSHDEBUGGROUPPROC) (GLenum source, GLuint id, GLsizei length, const GLchar *message); +typedef void (GL_APIENTRYP PFNGLPOPDEBUGGROUPPROC) (void); +typedef void (GL_APIENTRYP PFNGLOBJECTLABELPROC) (GLenum identifier, GLuint name, GLsizei length, const GLchar *label); +typedef void (GL_APIENTRYP PFNGLGETOBJECTLABELPROC) (GLenum identifier, GLuint name, GLsizei bufSize, GLsizei *length, GLchar *label); +typedef void (GL_APIENTRYP PFNGLOBJECTPTRLABELPROC) (const void *ptr, GLsizei length, const GLchar *label); +typedef void (GL_APIENTRYP PFNGLGETOBJECTPTRLABELPROC) (const void *ptr, GLsizei bufSize, GLsizei *length, GLchar *label); +typedef void (GL_APIENTRYP PFNGLGETPOINTERVPROC) (GLenum pname, void **params); +#endif + +#ifndef GL_KHR_texture_compression_astc_ldr +#define GL_KHR_texture_compression_astc_ldr 1 +#endif + + +/*------------------------------------------------------------------------* + * AMD extension functions + *------------------------------------------------------------------------*/ + +/* GL_AMD_compressed_3DC_texture */ +#ifndef GL_AMD_compressed_3DC_texture +#define GL_AMD_compressed_3DC_texture 1 +#endif + +/* GL_AMD_compressed_ATC_texture */ +#ifndef GL_AMD_compressed_ATC_texture +#define GL_AMD_compressed_ATC_texture 1 +#endif + +/* AMD_performance_monitor */ +#ifndef GL_AMD_performance_monitor +#define GL_AMD_performance_monitor 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glGetPerfMonitorGroupsAMD (GLint *numGroups, GLsizei groupsSize, GLuint *groups); +GL_APICALL void GL_APIENTRY glGetPerfMonitorCountersAMD (GLuint group, GLint *numCounters, GLint *maxActiveCounters, GLsizei counterSize, GLuint *counters); +GL_APICALL void GL_APIENTRY glGetPerfMonitorGroupStringAMD (GLuint group, GLsizei bufSize, GLsizei *length, GLchar *groupString); +GL_APICALL void GL_APIENTRY glGetPerfMonitorCounterStringAMD (GLuint group, GLuint counter, GLsizei bufSize, GLsizei *length, GLchar *counterString); +GL_APICALL void GL_APIENTRY glGetPerfMonitorCounterInfoAMD (GLuint group, GLuint counter, GLenum pname, GLvoid *data); +GL_APICALL void GL_APIENTRY glGenPerfMonitorsAMD (GLsizei n, GLuint *monitors); +GL_APICALL void GL_APIENTRY glDeletePerfMonitorsAMD (GLsizei n, GLuint *monitors); +GL_APICALL void GL_APIENTRY glSelectPerfMonitorCountersAMD (GLuint monitor, GLboolean enable, GLuint group, GLint numCounters, GLuint *countersList); +GL_APICALL void GL_APIENTRY glBeginPerfMonitorAMD (GLuint monitor); +GL_APICALL void GL_APIENTRY glEndPerfMonitorAMD (GLuint monitor); +GL_APICALL void GL_APIENTRY glGetPerfMonitorCounterDataAMD (GLuint monitor, GLenum pname, GLsizei dataSize, GLuint *data, GLint *bytesWritten); +#endif +typedef void (GL_APIENTRYP PFNGLGETPERFMONITORGROUPSAMDPROC) (GLint *numGroups, GLsizei groupsSize, GLuint *groups); +typedef void (GL_APIENTRYP PFNGLGETPERFMONITORCOUNTERSAMDPROC) (GLuint group, GLint *numCounters, GLint *maxActiveCounters, GLsizei counterSize, GLuint *counters); +typedef void (GL_APIENTRYP PFNGLGETPERFMONITORGROUPSTRINGAMDPROC) (GLuint group, GLsizei bufSize, GLsizei *length, GLchar *groupString); +typedef void (GL_APIENTRYP PFNGLGETPERFMONITORCOUNTERSTRINGAMDPROC) (GLuint group, GLuint counter, GLsizei bufSize, GLsizei *length, GLchar *counterString); +typedef void (GL_APIENTRYP PFNGLGETPERFMONITORCOUNTERINFOAMDPROC) (GLuint group, GLuint counter, GLenum pname, GLvoid *data); +typedef void (GL_APIENTRYP PFNGLGENPERFMONITORSAMDPROC) (GLsizei n, GLuint *monitors); +typedef void (GL_APIENTRYP PFNGLDELETEPERFMONITORSAMDPROC) (GLsizei n, GLuint *monitors); +typedef void (GL_APIENTRYP PFNGLSELECTPERFMONITORCOUNTERSAMDPROC) (GLuint monitor, GLboolean enable, GLuint group, GLint numCounters, GLuint *countersList); +typedef void (GL_APIENTRYP PFNGLBEGINPERFMONITORAMDPROC) (GLuint monitor); +typedef void (GL_APIENTRYP PFNGLENDPERFMONITORAMDPROC) (GLuint monitor); +typedef void (GL_APIENTRYP PFNGLGETPERFMONITORCOUNTERDATAAMDPROC) (GLuint monitor, GLenum pname, GLsizei dataSize, GLuint *data, GLint *bytesWritten); +#endif + +/* GL_AMD_program_binary_Z400 */ +#ifndef GL_AMD_program_binary_Z400 +#define GL_AMD_program_binary_Z400 1 +#endif + +/*------------------------------------------------------------------------* + * ANGLE extension functions + *------------------------------------------------------------------------*/ + +/* GL_ANGLE_framebuffer_blit */ +#ifndef GL_ANGLE_framebuffer_blit +#define GL_ANGLE_framebuffer_blit 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glBlitFramebufferANGLE (GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter); +#endif +typedef void (GL_APIENTRYP PFNGLBLITFRAMEBUFFERANGLEPROC) (GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter); +#endif + +/* GL_ANGLE_framebuffer_multisample */ +#ifndef GL_ANGLE_framebuffer_multisample +#define GL_ANGLE_framebuffer_multisample 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glRenderbufferStorageMultisampleANGLE (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height); +#endif +typedef void (GL_APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEANGLEPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height); +#endif + +#ifndef GL_ANGLE_instanced_arrays +#define GL_ANGLE_instanced_arrays 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glDrawArraysInstancedANGLE (GLenum mode, GLint first, GLsizei count, GLsizei primcount); +GL_APICALL void GL_APIENTRY glDrawElementsInstancedANGLE (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei primcount); +GL_APICALL void GL_APIENTRY glVertexAttribDivisorANGLE (GLuint index, GLuint divisor); +#endif +typedef void (GL_APIENTRYP PFLGLDRAWARRAYSINSTANCEDANGLEPROC) (GLenum mode, GLint first, GLsizei count, GLsizei primcount); +typedef void (GL_APIENTRYP PFLGLDRAWELEMENTSINSTANCEDANGLEPROC) (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei primcount); +typedef void (GL_APIENTRYP PFLGLVERTEXATTRIBDIVISORANGLEPROC) (GLuint index, GLuint divisor); +#endif + +/* GL_ANGLE_pack_reverse_row_order */ +#ifndef GL_ANGLE_pack_reverse_row_order +#define GL_ANGLE_pack_reverse_row_order 1 +#endif + +/* GL_ANGLE_texture_compression_dxt3 */ +#ifndef GL_ANGLE_texture_compression_dxt3 +#define GL_ANGLE_texture_compression_dxt3 1 +#endif + +/* GL_ANGLE_texture_compression_dxt5 */ +#ifndef GL_ANGLE_texture_compression_dxt5 +#define GL_ANGLE_texture_compression_dxt5 1 +#endif + +/* GL_ANGLE_texture_usage */ +#ifndef GL_ANGLE_texture_usage +#define GL_ANGLE_texture_usage 1 +#endif + +#ifndef GL_ANGLE_translated_shader_source +#define GL_ANGLE_translated_shader_source 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glGetTranslatedShaderSourceANGLE (GLuint shader, GLsizei bufsize, GLsizei *length, GLchar *source); +#endif +typedef void (GL_APIENTRYP PFLGLGETTRANSLATEDSHADERSOURCEANGLEPROC) (GLuint shader, GLsizei bufsize, GLsizei *length, GLchar *source); +#endif + +/*------------------------------------------------------------------------* + * APPLE extension functions + *------------------------------------------------------------------------*/ + +/* GL_APPLE_copy_texture_levels */ +#ifndef GL_APPLE_copy_texture_levels +#define GL_APPLE_copy_texture_levels 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glCopyTextureLevelsAPPLE (GLuint destinationTexture, GLuint sourceTexture, GLint sourceBaseLevel, GLsizei sourceLevelCount); +#endif +typedef void (GL_APIENTRYP PFNGLCOPYTEXTURELEVELSAPPLEPROC) (GLuint destinationTexture, GLuint sourceTexture, GLint sourceBaseLevel, GLsizei sourceLevelCount); +#endif + +/* GL_APPLE_framebuffer_multisample */ +#ifndef GL_APPLE_framebuffer_multisample +#define GL_APPLE_framebuffer_multisample 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glRenderbufferStorageMultisampleAPPLE (GLenum, GLsizei, GLenum, GLsizei, GLsizei); +GL_APICALL void GL_APIENTRY glResolveMultisampleFramebufferAPPLE (void); +#endif /* GL_GLEXT_PROTOTYPES */ +typedef void (GL_APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEAPPLEPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (GL_APIENTRYP PFNGLRESOLVEMULTISAMPLEFRAMEBUFFERAPPLEPROC) (void); +#endif + +/* GL_APPLE_rgb_422 */ +#ifndef GL_APPLE_rgb_422 +#define GL_APPLE_rgb_422 1 +#endif + +/* GL_APPLE_sync */ +#ifndef GL_APPLE_sync +#define GL_APPLE_sync 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL GLsync GL_APIENTRY glFenceSyncAPPLE (GLenum condition, GLbitfield flags); +GL_APICALL GLboolean GL_APIENTRY glIsSyncAPPLE (GLsync sync); +GL_APICALL void GL_APIENTRY glDeleteSyncAPPLE (GLsync sync); +GL_APICALL GLenum GL_APIENTRY glClientWaitSyncAPPLE (GLsync sync, GLbitfield flags, GLuint64 timeout); +GL_APICALL void GL_APIENTRY glWaitSyncAPPLE (GLsync sync, GLbitfield flags, GLuint64 timeout); +GL_APICALL void GL_APIENTRY glGetInteger64vAPPLE (GLenum pname, GLint64 *params); +GL_APICALL void GL_APIENTRY glGetSyncivAPPLE (GLsync sync, GLenum pname, GLsizei bufSize, GLsizei *length, GLint *values); +#endif +typedef GLsync (GL_APIENTRYP PFNGLFENCESYNCAPPLEPROC) (GLenum condition, GLbitfield flags); +typedef GLboolean (GL_APIENTRYP PFNGLISSYNCAPPLEPROC) (GLsync sync); +typedef void (GL_APIENTRYP PFNGLDELETESYNCAPPLEPROC) (GLsync sync); +typedef GLenum (GL_APIENTRYP PFNGLCLIENTWAITSYNCAPPLEPROC) (GLsync sync, GLbitfield flags, GLuint64 timeout); +typedef void (GL_APIENTRYP PFNGLWAITSYNCAPPLEPROC) (GLsync sync, GLbitfield flags, GLuint64 timeout); +typedef void (GL_APIENTRYP PFNGLGETINTEGER64VAPPLEPROC) (GLenum pname, GLint64 *params); +typedef void (GL_APIENTRYP PFNGLGETSYNCIVAPPLEPROC) (GLsync sync, GLenum pname, GLsizei bufSize, GLsizei *length, GLint *values); +#endif + +/* GL_APPLE_texture_format_BGRA8888 */ +#ifndef GL_APPLE_texture_format_BGRA8888 +#define GL_APPLE_texture_format_BGRA8888 1 +#endif + +/* GL_APPLE_texture_max_level */ +#ifndef GL_APPLE_texture_max_level +#define GL_APPLE_texture_max_level 1 +#endif + +/*------------------------------------------------------------------------* + * ARM extension functions + *------------------------------------------------------------------------*/ + +/* GL_ARM_mali_program_binary */ +#ifndef GL_ARM_mali_program_binary +#define GL_ARM_mali_program_binary 1 +#endif + +/* GL_ARM_mali_shader_binary */ +#ifndef GL_ARM_mali_shader_binary +#define GL_ARM_mali_shader_binary 1 +#endif + +/* GL_ARM_rgba8 */ +#ifndef GL_ARM_rgba8 +#define GL_ARM_rgba8 1 +#endif + +/*------------------------------------------------------------------------* + * EXT extension functions + *------------------------------------------------------------------------*/ + +/* GL_EXT_blend_minmax */ +#ifndef GL_EXT_blend_minmax +#define GL_EXT_blend_minmax 1 +#endif + +/* GL_EXT_color_buffer_half_float */ +#ifndef GL_EXT_color_buffer_half_float +#define GL_EXT_color_buffer_half_float 1 +#endif + +/* GL_EXT_debug_label */ +#ifndef GL_EXT_debug_label +#define GL_EXT_debug_label 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glLabelObjectEXT (GLenum type, GLuint object, GLsizei length, const GLchar *label); +GL_APICALL void GL_APIENTRY glGetObjectLabelEXT (GLenum type, GLuint object, GLsizei bufSize, GLsizei *length, GLchar *label); +#endif +typedef void (GL_APIENTRYP PFNGLLABELOBJECTEXTPROC) (GLenum type, GLuint object, GLsizei length, const GLchar *label); +typedef void (GL_APIENTRYP PFNGLGETOBJECTLABELEXTPROC) (GLenum type, GLuint object, GLsizei bufSize, GLsizei *length, GLchar *label); +#endif + +/* GL_EXT_debug_marker */ +#ifndef GL_EXT_debug_marker +#define GL_EXT_debug_marker 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glInsertEventMarkerEXT (GLsizei length, const GLchar *marker); +GL_APICALL void GL_APIENTRY glPushGroupMarkerEXT (GLsizei length, const GLchar *marker); +GL_APICALL void GL_APIENTRY glPopGroupMarkerEXT (void); +#endif +typedef void (GL_APIENTRYP PFNGLINSERTEVENTMARKEREXTPROC) (GLsizei length, const GLchar *marker); +typedef void (GL_APIENTRYP PFNGLPUSHGROUPMARKEREXTPROC) (GLsizei length, const GLchar *marker); +typedef void (GL_APIENTRYP PFNGLPOPGROUPMARKEREXTPROC) (void); +#endif + +/* GL_EXT_discard_framebuffer */ +#ifndef GL_EXT_discard_framebuffer +#define GL_EXT_discard_framebuffer 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glDiscardFramebufferEXT (GLenum target, GLsizei numAttachments, const GLenum *attachments); +#endif +typedef void (GL_APIENTRYP PFNGLDISCARDFRAMEBUFFEREXTPROC) (GLenum target, GLsizei numAttachments, const GLenum *attachments); +#endif + +/* GL_EXT_map_buffer_range */ +#ifndef GL_EXT_map_buffer_range +#define GL_EXT_map_buffer_range 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void* GL_APIENTRY glMapBufferRangeEXT (GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access); +GL_APICALL void GL_APIENTRY glFlushMappedBufferRangeEXT (GLenum target, GLintptr offset, GLsizeiptr length); +#endif +typedef void* (GL_APIENTRYP PFNGLMAPBUFFERRANGEEXTPROC) (GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access); +typedef void (GL_APIENTRYP PFNGLFLUSHMAPPEDBUFFERRANGEEXTPROC) (GLenum target, GLintptr offset, GLsizeiptr length); +#endif + +/* GL_EXT_multisampled_render_to_texture */ +#ifndef GL_EXT_multisampled_render_to_texture +#define GL_EXT_multisampled_render_to_texture 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glRenderbufferStorageMultisampleEXT (GLenum, GLsizei, GLenum, GLsizei, GLsizei); +GL_APICALL void GL_APIENTRY glFramebufferTexture2DMultisampleEXT (GLenum, GLenum, GLenum, GLuint, GLint, GLsizei); +#endif +typedef void (GL_APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEEXTPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (GL_APIENTRYP PFNGLFRAMEBUFFERTEXTURE2DMULTISAMPLEEXTPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLsizei samples); +#endif + +/* GL_EXT_multiview_draw_buffers */ +#ifndef GL_EXT_multiview_draw_buffers +#define GL_EXT_multiview_draw_buffers 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glReadBufferIndexedEXT (GLenum src, GLint index); +GL_APICALL void GL_APIENTRY glDrawBuffersIndexedEXT (GLint n, const GLenum *location, const GLint *indices); +GL_APICALL void GL_APIENTRY glGetIntegeri_vEXT (GLenum target, GLuint index, GLint *data); +#endif +typedef void (GL_APIENTRYP PFNGLREADBUFFERINDEXEDEXTPROC) (GLenum src, GLint index); +typedef void (GL_APIENTRYP PFNGLDRAWBUFFERSINDEXEDEXTPROC) (GLint n, const GLenum *location, const GLint *indices); +typedef void (GL_APIENTRYP PFNGLGETINTEGERI_VEXTPROC) (GLenum target, GLuint index, GLint *data); +#endif + +#ifndef GL_EXT_multi_draw_arrays +#define GL_EXT_multi_draw_arrays 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glMultiDrawArraysEXT (GLenum, GLint *, GLsizei *, GLsizei); +GL_APICALL void GL_APIENTRY glMultiDrawElementsEXT (GLenum, const GLsizei *, GLenum, const GLvoid* *, GLsizei); +#endif /* GL_GLEXT_PROTOTYPES */ +typedef void (GL_APIENTRYP PFNGLMULTIDRAWARRAYSEXTPROC) (GLenum mode, GLint *first, GLsizei *count, GLsizei primcount); +typedef void (GL_APIENTRYP PFNGLMULTIDRAWELEMENTSEXTPROC) (GLenum mode, const GLsizei *count, GLenum type, const GLvoid* *indices, GLsizei primcount); +#endif + +/* GL_EXT_occlusion_query_boolean */ +#ifndef GL_EXT_occlusion_query_boolean +#define GL_EXT_occlusion_query_boolean 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glGenQueriesEXT (GLsizei n, GLuint *ids); +GL_APICALL void GL_APIENTRY glDeleteQueriesEXT (GLsizei n, const GLuint *ids); +GL_APICALL GLboolean GL_APIENTRY glIsQueryEXT (GLuint id); +GL_APICALL void GL_APIENTRY glBeginQueryEXT (GLenum target, GLuint id); +GL_APICALL void GL_APIENTRY glEndQueryEXT (GLenum target); +GL_APICALL void GL_APIENTRY glGetQueryivEXT (GLenum target, GLenum pname, GLint *params); +GL_APICALL void GL_APIENTRY glGetQueryObjectuivEXT (GLuint id, GLenum pname, GLuint *params); +#endif +typedef void (GL_APIENTRYP PFNGLGENQUERIESEXTPROC) (GLsizei n, GLuint *ids); +typedef void (GL_APIENTRYP PFNGLDELETEQUERIESEXTPROC) (GLsizei n, const GLuint *ids); +typedef GLboolean (GL_APIENTRYP PFNGLISQUERYEXTPROC) (GLuint id); +typedef void (GL_APIENTRYP PFNGLBEGINQUERYEXTPROC) (GLenum target, GLuint id); +typedef void (GL_APIENTRYP PFNGLENDQUERYEXTPROC) (GLenum target); +typedef void (GL_APIENTRYP PFNGLGETQUERYIVEXTPROC) (GLenum target, GLenum pname, GLint *params); +typedef void (GL_APIENTRYP PFNGLGETQUERYOBJECTUIVEXTPROC) (GLuint id, GLenum pname, GLuint *params); +#endif + +/* GL_EXT_read_format_bgra */ +#ifndef GL_EXT_read_format_bgra +#define GL_EXT_read_format_bgra 1 +#endif + +/* GL_EXT_robustness */ +#ifndef GL_EXT_robustness +#define GL_EXT_robustness 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL GLenum GL_APIENTRY glGetGraphicsResetStatusEXT (void); +GL_APICALL void GL_APIENTRY glReadnPixelsEXT (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLsizei bufSize, void *data); +GL_APICALL void GL_APIENTRY glGetnUniformfvEXT (GLuint program, GLint location, GLsizei bufSize, float *params); +GL_APICALL void GL_APIENTRY glGetnUniformivEXT (GLuint program, GLint location, GLsizei bufSize, GLint *params); +#endif +typedef GLenum (GL_APIENTRYP PFNGLGETGRAPHICSRESETSTATUSEXTPROC) (void); +typedef void (GL_APIENTRYP PFNGLREADNPIXELSEXTPROC) (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLsizei bufSize, void *data); +typedef void (GL_APIENTRYP PFNGLGETNUNIFORMFVEXTPROC) (GLuint program, GLint location, GLsizei bufSize, float *params); +typedef void (GL_APIENTRYP PFNGLGETNUNIFORMIVEXTPROC) (GLuint program, GLint location, GLsizei bufSize, GLint *params); +#endif + +/* GL_EXT_separate_shader_objects */ +#ifndef GL_EXT_separate_shader_objects +#define GL_EXT_separate_shader_objects 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glUseProgramStagesEXT (GLuint pipeline, GLbitfield stages, GLuint program); +GL_APICALL void GL_APIENTRY glActiveShaderProgramEXT (GLuint pipeline, GLuint program); +GL_APICALL GLuint GL_APIENTRY glCreateShaderProgramvEXT (GLenum type, GLsizei count, const GLchar **strings); +GL_APICALL void GL_APIENTRY glBindProgramPipelineEXT (GLuint pipeline); +GL_APICALL void GL_APIENTRY glDeleteProgramPipelinesEXT (GLsizei n, const GLuint *pipelines); +GL_APICALL void GL_APIENTRY glGenProgramPipelinesEXT (GLsizei n, GLuint *pipelines); +GL_APICALL GLboolean GL_APIENTRY glIsProgramPipelineEXT (GLuint pipeline); +GL_APICALL void GL_APIENTRY glProgramParameteriEXT (GLuint program, GLenum pname, GLint value); +GL_APICALL void GL_APIENTRY glGetProgramPipelineivEXT (GLuint pipeline, GLenum pname, GLint *params); +GL_APICALL void GL_APIENTRY glProgramUniform1iEXT (GLuint program, GLint location, GLint x); +GL_APICALL void GL_APIENTRY glProgramUniform2iEXT (GLuint program, GLint location, GLint x, GLint y); +GL_APICALL void GL_APIENTRY glProgramUniform3iEXT (GLuint program, GLint location, GLint x, GLint y, GLint z); +GL_APICALL void GL_APIENTRY glProgramUniform4iEXT (GLuint program, GLint location, GLint x, GLint y, GLint z, GLint w); +GL_APICALL void GL_APIENTRY glProgramUniform1fEXT (GLuint program, GLint location, GLfloat x); +GL_APICALL void GL_APIENTRY glProgramUniform2fEXT (GLuint program, GLint location, GLfloat x, GLfloat y); +GL_APICALL void GL_APIENTRY glProgramUniform3fEXT (GLuint program, GLint location, GLfloat x, GLfloat y, GLfloat z); +GL_APICALL void GL_APIENTRY glProgramUniform4fEXT (GLuint program, GLint location, GLfloat x, GLfloat y, GLfloat z, GLfloat w); +GL_APICALL void GL_APIENTRY glProgramUniform1ivEXT (GLuint program, GLint location, GLsizei count, const GLint *value); +GL_APICALL void GL_APIENTRY glProgramUniform2ivEXT (GLuint program, GLint location, GLsizei count, const GLint *value); +GL_APICALL void GL_APIENTRY glProgramUniform3ivEXT (GLuint program, GLint location, GLsizei count, const GLint *value); +GL_APICALL void GL_APIENTRY glProgramUniform4ivEXT (GLuint program, GLint location, GLsizei count, const GLint *value); +GL_APICALL void GL_APIENTRY glProgramUniform1fvEXT (GLuint program, GLint location, GLsizei count, const GLfloat *value); +GL_APICALL void GL_APIENTRY glProgramUniform2fvEXT (GLuint program, GLint location, GLsizei count, const GLfloat *value); +GL_APICALL void GL_APIENTRY glProgramUniform3fvEXT (GLuint program, GLint location, GLsizei count, const GLfloat *value); +GL_APICALL void GL_APIENTRY glProgramUniform4fvEXT (GLuint program, GLint location, GLsizei count, const GLfloat *value); +GL_APICALL void GL_APIENTRY glProgramUniformMatrix2fvEXT (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GL_APICALL void GL_APIENTRY glProgramUniformMatrix3fvEXT (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GL_APICALL void GL_APIENTRY glProgramUniformMatrix4fvEXT (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GL_APICALL void GL_APIENTRY glValidateProgramPipelineEXT (GLuint pipeline); +GL_APICALL void GL_APIENTRY glGetProgramPipelineInfoLogEXT (GLuint pipeline, GLsizei bufSize, GLsizei *length, GLchar *infoLog); +#endif +typedef void (GL_APIENTRYP PFNGLUSEPROGRAMSTAGESEXTPROC) (GLuint pipeline, GLbitfield stages, GLuint program); +typedef void (GL_APIENTRYP PFNGLACTIVESHADERPROGRAMEXTPROC) (GLuint pipeline, GLuint program); +typedef GLuint (GL_APIENTRYP PFNGLCREATESHADERPROGRAMVEXTPROC) (GLenum type, GLsizei count, const GLchar **strings); +typedef void (GL_APIENTRYP PFNGLBINDPROGRAMPIPELINEEXTPROC) (GLuint pipeline); +typedef void (GL_APIENTRYP PFNGLDELETEPROGRAMPIPELINESEXTPROC) (GLsizei n, const GLuint *pipelines); +typedef void (GL_APIENTRYP PFNGLGENPROGRAMPIPELINESEXTPROC) (GLsizei n, GLuint *pipelines); +typedef GLboolean (GL_APIENTRYP PFNGLISPROGRAMPIPELINEEXTPROC) (GLuint pipeline); +typedef void (GL_APIENTRYP PFNGLPROGRAMPARAMETERIEXTPROC) (GLuint program, GLenum pname, GLint value); +typedef void (GL_APIENTRYP PFNGLGETPROGRAMPIPELINEIVEXTPROC) (GLuint pipeline, GLenum pname, GLint *params); +typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM1IEXTPROC) (GLuint program, GLint location, GLint x); +typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM2IEXTPROC) (GLuint program, GLint location, GLint x, GLint y); +typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM3IEXTPROC) (GLuint program, GLint location, GLint x, GLint y, GLint z); +typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM4IEXTPROC) (GLuint program, GLint location, GLint x, GLint y, GLint z, GLint w); +typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM1FEXTPROC) (GLuint program, GLint location, GLfloat x); +typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM2FEXTPROC) (GLuint program, GLint location, GLfloat x, GLfloat y); +typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM3FEXTPROC) (GLuint program, GLint location, GLfloat x, GLfloat y, GLfloat z); +typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM4FEXTPROC) (GLuint program, GLint location, GLfloat x, GLfloat y, GLfloat z, GLfloat w); +typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM1IVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLint *value); +typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM2IVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLint *value); +typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM3IVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLint *value); +typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM4IVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLint *value); +typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM1FVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLfloat *value); +typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM2FVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLfloat *value); +typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM3FVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLfloat *value); +typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORM4FVEXTPROC) (GLuint program, GLint location, GLsizei count, const GLfloat *value); +typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORMMATRIX2FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORMMATRIX3FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +typedef void (GL_APIENTRYP PFNGLPROGRAMUNIFORMMATRIX4FVEXTPROC) (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +typedef void (GL_APIENTRYP PFNGLVALIDATEPROGRAMPIPELINEEXTPROC) (GLuint pipeline); +typedef void (GL_APIENTRYP PFNGLGETPROGRAMPIPELINEINFOLOGEXTPROC) (GLuint pipeline, GLsizei bufSize, GLsizei *length, GLchar *infoLog); +#endif + +/* GL_EXT_shader_framebuffer_fetch */ +#ifndef GL_EXT_shader_framebuffer_fetch +#define GL_EXT_shader_framebuffer_fetch 1 +#endif + +/* GL_EXT_shader_texture_lod */ +#ifndef GL_EXT_shader_texture_lod +#define GL_EXT_shader_texture_lod 1 +#endif + +/* GL_EXT_shadow_samplers */ +#ifndef GL_EXT_shadow_samplers +#define GL_EXT_shadow_samplers 1 +#endif + +/* GL_EXT_sRGB */ +#ifndef GL_EXT_sRGB +#define GL_EXT_sRGB 1 +#endif + +/* GL_EXT_texture_compression_dxt1 */ +#ifndef GL_EXT_texture_compression_dxt1 +#define GL_EXT_texture_compression_dxt1 1 +#endif + +/* GL_EXT_texture_filter_anisotropic */ +#ifndef GL_EXT_texture_filter_anisotropic +#define GL_EXT_texture_filter_anisotropic 1 +#endif + +/* GL_EXT_texture_format_BGRA8888 */ +#ifndef GL_EXT_texture_format_BGRA8888 +#define GL_EXT_texture_format_BGRA8888 1 +#endif + +/* GL_EXT_texture_rg */ +#ifndef GL_EXT_texture_rg +#define GL_EXT_texture_rg 1 +#endif + +/* GL_EXT_texture_storage */ +#ifndef GL_EXT_texture_storage +#define GL_EXT_texture_storage 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glTexStorage1DEXT (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width); +GL_APICALL void GL_APIENTRY glTexStorage2DEXT (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height); +GL_APICALL void GL_APIENTRY glTexStorage3DEXT (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth); +GL_APICALL void GL_APIENTRY glTextureStorage1DEXT (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width); +GL_APICALL void GL_APIENTRY glTextureStorage2DEXT (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height); +GL_APICALL void GL_APIENTRY glTextureStorage3DEXT (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth); +#endif +typedef void (GL_APIENTRYP PFNGLTEXSTORAGE1DEXTPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width); +typedef void (GL_APIENTRYP PFNGLTEXSTORAGE2DEXTPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (GL_APIENTRYP PFNGLTEXSTORAGE3DEXTPROC) (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth); +typedef void (GL_APIENTRYP PFNGLTEXTURESTORAGE1DEXTPROC) (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width); +typedef void (GL_APIENTRYP PFNGLTEXTURESTORAGE2DEXTPROC) (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (GL_APIENTRYP PFNGLTEXTURESTORAGE3DEXTPROC) (GLuint texture, GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth); +#endif + +/* GL_EXT_texture_type_2_10_10_10_REV */ +#ifndef GL_EXT_texture_type_2_10_10_10_REV +#define GL_EXT_texture_type_2_10_10_10_REV 1 +#endif + +/* GL_EXT_unpack_subimage */ +#ifndef GL_EXT_unpack_subimage +#define GL_EXT_unpack_subimage 1 +#endif + +/*------------------------------------------------------------------------* + * DMP extension functions + *------------------------------------------------------------------------*/ + +/* GL_DMP_shader_binary */ +#ifndef GL_DMP_shader_binary +#define GL_DMP_shader_binary 1 +#endif + +/*------------------------------------------------------------------------* + * FJ extension functions + *------------------------------------------------------------------------*/ + +/* GL_FJ_shader_binary_GCCSO */ +#ifndef GL_FJ_shader_binary_GCCSO +#define GL_FJ_shader_binary_GCCSO 1 +#endif + +/*------------------------------------------------------------------------* + * IMG extension functions + *------------------------------------------------------------------------*/ + +/* GL_IMG_program_binary */ +#ifndef GL_IMG_program_binary +#define GL_IMG_program_binary 1 +#endif + +/* GL_IMG_read_format */ +#ifndef GL_IMG_read_format +#define GL_IMG_read_format 1 +#endif + +/* GL_IMG_shader_binary */ +#ifndef GL_IMG_shader_binary +#define GL_IMG_shader_binary 1 +#endif + +/* GL_IMG_texture_compression_pvrtc */ +#ifndef GL_IMG_texture_compression_pvrtc +#define GL_IMG_texture_compression_pvrtc 1 +#endif + +/* GL_IMG_multisampled_render_to_texture */ +#ifndef GL_IMG_multisampled_render_to_texture +#define GL_IMG_multisampled_render_to_texture 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glRenderbufferStorageMultisampleIMG (GLenum, GLsizei, GLenum, GLsizei, GLsizei); +GL_APICALL void GL_APIENTRY glFramebufferTexture2DMultisampleIMG (GLenum, GLenum, GLenum, GLuint, GLint, GLsizei); +#endif +typedef void (GL_APIENTRYP PFNGLRENDERBUFFERSTORAGEMULTISAMPLEIMGPROC) (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height); +typedef void (GL_APIENTRYP PFNGLFRAMEBUFFERTEXTURE2DMULTISAMPLEIMGPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level, GLsizei samples); +#endif + +/*------------------------------------------------------------------------* + * NV extension functions + *------------------------------------------------------------------------*/ + +/* GL_NV_coverage_sample */ +#ifndef GL_NV_coverage_sample +#define GL_NV_coverage_sample 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glCoverageMaskNV (GLboolean mask); +GL_APICALL void GL_APIENTRY glCoverageOperationNV (GLenum operation); +#endif +typedef void (GL_APIENTRYP PFNGLCOVERAGEMASKNVPROC) (GLboolean mask); +typedef void (GL_APIENTRYP PFNGLCOVERAGEOPERATIONNVPROC) (GLenum operation); +#endif + +/* GL_NV_depth_nonlinear */ +#ifndef GL_NV_depth_nonlinear +#define GL_NV_depth_nonlinear 1 +#endif + +/* GL_NV_draw_buffers */ +#ifndef GL_NV_draw_buffers +#define GL_NV_draw_buffers 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glDrawBuffersNV (GLsizei n, const GLenum *bufs); +#endif +typedef void (GL_APIENTRYP PFNGLDRAWBUFFERSNVPROC) (GLsizei n, const GLenum *bufs); +#endif + +/* GL_NV_fbo_color_attachments */ +#ifndef GL_NV_fbo_color_attachments +#define GL_NV_fbo_color_attachments 1 +#endif + +/* GL_NV_fence */ +#ifndef GL_NV_fence +#define GL_NV_fence 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glDeleteFencesNV (GLsizei, const GLuint *); +GL_APICALL void GL_APIENTRY glGenFencesNV (GLsizei, GLuint *); +GL_APICALL GLboolean GL_APIENTRY glIsFenceNV (GLuint); +GL_APICALL GLboolean GL_APIENTRY glTestFenceNV (GLuint); +GL_APICALL void GL_APIENTRY glGetFenceivNV (GLuint, GLenum, GLint *); +GL_APICALL void GL_APIENTRY glFinishFenceNV (GLuint); +GL_APICALL void GL_APIENTRY glSetFenceNV (GLuint, GLenum); +#endif +typedef void (GL_APIENTRYP PFNGLDELETEFENCESNVPROC) (GLsizei n, const GLuint *fences); +typedef void (GL_APIENTRYP PFNGLGENFENCESNVPROC) (GLsizei n, GLuint *fences); +typedef GLboolean (GL_APIENTRYP PFNGLISFENCENVPROC) (GLuint fence); +typedef GLboolean (GL_APIENTRYP PFNGLTESTFENCENVPROC) (GLuint fence); +typedef void (GL_APIENTRYP PFNGLGETFENCEIVNVPROC) (GLuint fence, GLenum pname, GLint *params); +typedef void (GL_APIENTRYP PFNGLFINISHFENCENVPROC) (GLuint fence); +typedef void (GL_APIENTRYP PFNGLSETFENCENVPROC) (GLuint fence, GLenum condition); +#endif + +/* GL_NV_read_buffer */ +#ifndef GL_NV_read_buffer +#define GL_NV_read_buffer 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glReadBufferNV (GLenum mode); +#endif +typedef void (GL_APIENTRYP PFNGLREADBUFFERNVPROC) (GLenum mode); +#endif + +/* GL_NV_read_buffer_front */ +#ifndef GL_NV_read_buffer_front +#define GL_NV_read_buffer_front 1 +#endif + +/* GL_NV_read_depth */ +#ifndef GL_NV_read_depth +#define GL_NV_read_depth 1 +#endif + +/* GL_NV_read_depth_stencil */ +#ifndef GL_NV_read_depth_stencil +#define GL_NV_read_depth_stencil 1 +#endif + +/* GL_NV_read_stencil */ +#ifndef GL_NV_read_stencil +#define GL_NV_read_stencil 1 +#endif + +/* GL_NV_texture_compression_s3tc_update */ +#ifndef GL_NV_texture_compression_s3tc_update +#define GL_NV_texture_compression_s3tc_update 1 +#endif + +/* GL_NV_texture_npot_2D_mipmap */ +#ifndef GL_NV_texture_npot_2D_mipmap +#define GL_NV_texture_npot_2D_mipmap 1 +#endif + +/*------------------------------------------------------------------------* + * QCOM extension functions + *------------------------------------------------------------------------*/ + +/* GL_QCOM_alpha_test */ +#ifndef GL_QCOM_alpha_test +#define GL_QCOM_alpha_test 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glAlphaFuncQCOM (GLenum func, GLclampf ref); +#endif +typedef void (GL_APIENTRYP PFNGLALPHAFUNCQCOMPROC) (GLenum func, GLclampf ref); +#endif + +/* GL_QCOM_binning_control */ +#ifndef GL_QCOM_binning_control +#define GL_QCOM_binning_control 1 +#endif + +/* GL_QCOM_driver_control */ +#ifndef GL_QCOM_driver_control +#define GL_QCOM_driver_control 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glGetDriverControlsQCOM (GLint *num, GLsizei size, GLuint *driverControls); +GL_APICALL void GL_APIENTRY glGetDriverControlStringQCOM (GLuint driverControl, GLsizei bufSize, GLsizei *length, GLchar *driverControlString); +GL_APICALL void GL_APIENTRY glEnableDriverControlQCOM (GLuint driverControl); +GL_APICALL void GL_APIENTRY glDisableDriverControlQCOM (GLuint driverControl); +#endif +typedef void (GL_APIENTRYP PFNGLGETDRIVERCONTROLSQCOMPROC) (GLint *num, GLsizei size, GLuint *driverControls); +typedef void (GL_APIENTRYP PFNGLGETDRIVERCONTROLSTRINGQCOMPROC) (GLuint driverControl, GLsizei bufSize, GLsizei *length, GLchar *driverControlString); +typedef void (GL_APIENTRYP PFNGLENABLEDRIVERCONTROLQCOMPROC) (GLuint driverControl); +typedef void (GL_APIENTRYP PFNGLDISABLEDRIVERCONTROLQCOMPROC) (GLuint driverControl); +#endif + +/* GL_QCOM_extended_get */ +#ifndef GL_QCOM_extended_get +#define GL_QCOM_extended_get 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glExtGetTexturesQCOM (GLuint *textures, GLint maxTextures, GLint *numTextures); +GL_APICALL void GL_APIENTRY glExtGetBuffersQCOM (GLuint *buffers, GLint maxBuffers, GLint *numBuffers); +GL_APICALL void GL_APIENTRY glExtGetRenderbuffersQCOM (GLuint *renderbuffers, GLint maxRenderbuffers, GLint *numRenderbuffers); +GL_APICALL void GL_APIENTRY glExtGetFramebuffersQCOM (GLuint *framebuffers, GLint maxFramebuffers, GLint *numFramebuffers); +GL_APICALL void GL_APIENTRY glExtGetTexLevelParameterivQCOM (GLuint texture, GLenum face, GLint level, GLenum pname, GLint *params); +GL_APICALL void GL_APIENTRY glExtTexObjectStateOverrideiQCOM (GLenum target, GLenum pname, GLint param); +GL_APICALL void GL_APIENTRY glExtGetTexSubImageQCOM (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, GLvoid *texels); +GL_APICALL void GL_APIENTRY glExtGetBufferPointervQCOM (GLenum target, GLvoid **params); +#endif +typedef void (GL_APIENTRYP PFNGLEXTGETTEXTURESQCOMPROC) (GLuint *textures, GLint maxTextures, GLint *numTextures); +typedef void (GL_APIENTRYP PFNGLEXTGETBUFFERSQCOMPROC) (GLuint *buffers, GLint maxBuffers, GLint *numBuffers); +typedef void (GL_APIENTRYP PFNGLEXTGETRENDERBUFFERSQCOMPROC) (GLuint *renderbuffers, GLint maxRenderbuffers, GLint *numRenderbuffers); +typedef void (GL_APIENTRYP PFNGLEXTGETFRAMEBUFFERSQCOMPROC) (GLuint *framebuffers, GLint maxFramebuffers, GLint *numFramebuffers); +typedef void (GL_APIENTRYP PFNGLEXTGETTEXLEVELPARAMETERIVQCOMPROC) (GLuint texture, GLenum face, GLint level, GLenum pname, GLint *params); +typedef void (GL_APIENTRYP PFNGLEXTTEXOBJECTSTATEOVERRIDEIQCOMPROC) (GLenum target, GLenum pname, GLint param); +typedef void (GL_APIENTRYP PFNGLEXTGETTEXSUBIMAGEQCOMPROC) (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, GLvoid *texels); +typedef void (GL_APIENTRYP PFNGLEXTGETBUFFERPOINTERVQCOMPROC) (GLenum target, GLvoid **params); +#endif + +/* GL_QCOM_extended_get2 */ +#ifndef GL_QCOM_extended_get2 +#define GL_QCOM_extended_get2 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glExtGetShadersQCOM (GLuint *shaders, GLint maxShaders, GLint *numShaders); +GL_APICALL void GL_APIENTRY glExtGetProgramsQCOM (GLuint *programs, GLint maxPrograms, GLint *numPrograms); +GL_APICALL GLboolean GL_APIENTRY glExtIsProgramBinaryQCOM (GLuint program); +GL_APICALL void GL_APIENTRY glExtGetProgramBinarySourceQCOM (GLuint program, GLenum shadertype, GLchar *source, GLint *length); +#endif +typedef void (GL_APIENTRYP PFNGLEXTGETSHADERSQCOMPROC) (GLuint *shaders, GLint maxShaders, GLint *numShaders); +typedef void (GL_APIENTRYP PFNGLEXTGETPROGRAMSQCOMPROC) (GLuint *programs, GLint maxPrograms, GLint *numPrograms); +typedef GLboolean (GL_APIENTRYP PFNGLEXTISPROGRAMBINARYQCOMPROC) (GLuint program); +typedef void (GL_APIENTRYP PFNGLEXTGETPROGRAMBINARYSOURCEQCOMPROC) (GLuint program, GLenum shadertype, GLchar *source, GLint *length); +#endif + +/* GL_QCOM_perfmon_global_mode */ +#ifndef GL_QCOM_perfmon_global_mode +#define GL_QCOM_perfmon_global_mode 1 +#endif + +/* GL_QCOM_writeonly_rendering */ +#ifndef GL_QCOM_writeonly_rendering +#define GL_QCOM_writeonly_rendering 1 +#endif + +/* GL_QCOM_tiled_rendering */ +#ifndef GL_QCOM_tiled_rendering +#define GL_QCOM_tiled_rendering 1 +#ifdef GL_GLEXT_PROTOTYPES +GL_APICALL void GL_APIENTRY glStartTilingQCOM (GLuint x, GLuint y, GLuint width, GLuint height, GLbitfield preserveMask); +GL_APICALL void GL_APIENTRY glEndTilingQCOM (GLbitfield preserveMask); +#endif +typedef void (GL_APIENTRYP PFNGLSTARTTILINGQCOMPROC) (GLuint x, GLuint y, GLuint width, GLuint height, GLbitfield preserveMask); +typedef void (GL_APIENTRYP PFNGLENDTILINGQCOMPROC) (GLbitfield preserveMask); +#endif + +/*------------------------------------------------------------------------* + * VIV extension tokens + *------------------------------------------------------------------------*/ + +/* GL_VIV_shader_binary */ +#ifndef GL_VIV_shader_binary +#define GL_VIV_shader_binary 1 +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* __gl2ext_h_ */ diff --git a/opengles-3.1/include/GLES2/gl2platform.h b/opengles-3.1/include/GLES2/gl2platform.h new file mode 100644 index 0000000000..c9fa3c4d64 --- /dev/null +++ b/opengles-3.1/include/GLES2/gl2platform.h @@ -0,0 +1,30 @@ +#ifndef __gl2platform_h_ +#define __gl2platform_h_ + +/* $Revision: 10602 $ on $Date:: 2010-03-04 22:35:34 -0800 #$ */ + +/* + * This document is licensed under the SGI Free Software B License Version + * 2.0. For details, see http://oss.sgi.com/projects/FreeB/ . + */ + +/* Platform-specific types and definitions for OpenGL ES 2.X gl2.h + * + * Adopters may modify khrplatform.h and this file to suit their platform. + * You are encouraged to submit all modifications to the Khronos group so that + * they can be included in future versions of this file. Please submit changes + * by sending them to the public Khronos Bugzilla (http://khronos.org/bugzilla) + * by filing a bug against product "OpenGL-ES" component "Registry". + */ + +#include + +#ifndef GL_APICALL +#define GL_APICALL KHRONOS_APICALL +#endif + +#ifndef GL_APIENTRY +#define GL_APIENTRY KHRONOS_APIENTRY +#endif + +#endif /* __gl2platform_h_ */ diff --git a/opengles-3.1/include/GLES3/gl3.h b/opengles-3.1/include/GLES3/gl3.h new file mode 100644 index 0000000000..9c79862c0d --- /dev/null +++ b/opengles-3.1/include/GLES3/gl3.h @@ -0,0 +1,1061 @@ +#ifndef __gl3_h_ +#define __gl3_h_ + +/* + * gl3.h last updated on $Date: 2013-02-12 14:37:24 -0800 (Tue, 12 Feb 2013) $ + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* +** Copyright (c) 2007-2013 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ + +/*------------------------------------------------------------------------- + * Data type definitions + *-----------------------------------------------------------------------*/ + +/* OpenGL ES 2.0 */ + +typedef void GLvoid; +typedef char GLchar; +typedef unsigned int GLenum; +typedef unsigned char GLboolean; +typedef unsigned int GLbitfield; +typedef khronos_int8_t GLbyte; +typedef short GLshort; +typedef int GLint; +typedef int GLsizei; +typedef khronos_uint8_t GLubyte; +typedef unsigned short GLushort; +typedef unsigned int GLuint; +typedef khronos_float_t GLfloat; +typedef khronos_float_t GLclampf; +typedef khronos_int32_t GLfixed; +typedef khronos_intptr_t GLintptr; +typedef khronos_ssize_t GLsizeiptr; + +/* OpenGL ES 3.0 */ + +typedef unsigned short GLhalf; +typedef khronos_int64_t GLint64; +typedef khronos_uint64_t GLuint64; +typedef struct __GLsync *GLsync; + +/*------------------------------------------------------------------------- + * Token definitions + *-----------------------------------------------------------------------*/ + +/* OpenGL ES core versions */ +#define GL_ES_VERSION_3_0 1 +#define GL_ES_VERSION_2_0 1 + +/* OpenGL ES 2.0 */ + +/* ClearBufferMask */ +#define GL_DEPTH_BUFFER_BIT 0x00000100 +#define GL_STENCIL_BUFFER_BIT 0x00000400 +#define GL_COLOR_BUFFER_BIT 0x00004000 + +/* Boolean */ +#define GL_FALSE 0 +#define GL_TRUE 1 + +/* BeginMode */ +#define GL_POINTS 0x0000 +#define GL_LINES 0x0001 +#define GL_LINE_LOOP 0x0002 +#define GL_LINE_STRIP 0x0003 +#define GL_TRIANGLES 0x0004 +#define GL_TRIANGLE_STRIP 0x0005 +#define GL_TRIANGLE_FAN 0x0006 + +/* BlendingFactorDest */ +#define GL_ZERO 0 +#define GL_ONE 1 +#define GL_SRC_COLOR 0x0300 +#define GL_ONE_MINUS_SRC_COLOR 0x0301 +#define GL_SRC_ALPHA 0x0302 +#define GL_ONE_MINUS_SRC_ALPHA 0x0303 +#define GL_DST_ALPHA 0x0304 +#define GL_ONE_MINUS_DST_ALPHA 0x0305 + +/* BlendingFactorSrc */ +/* GL_ZERO */ +/* GL_ONE */ +#define GL_DST_COLOR 0x0306 +#define GL_ONE_MINUS_DST_COLOR 0x0307 +#define GL_SRC_ALPHA_SATURATE 0x0308 +/* GL_SRC_ALPHA */ +/* GL_ONE_MINUS_SRC_ALPHA */ +/* GL_DST_ALPHA */ +/* GL_ONE_MINUS_DST_ALPHA */ + +/* BlendEquationSeparate */ +#define GL_FUNC_ADD 0x8006 +#define GL_BLEND_EQUATION 0x8009 +#define GL_BLEND_EQUATION_RGB 0x8009 /* same as BLEND_EQUATION */ +#define GL_BLEND_EQUATION_ALPHA 0x883D + +/* BlendSubtract */ +#define GL_FUNC_SUBTRACT 0x800A +#define GL_FUNC_REVERSE_SUBTRACT 0x800B + +/* Separate Blend Functions */ +#define GL_BLEND_DST_RGB 0x80C8 +#define GL_BLEND_SRC_RGB 0x80C9 +#define GL_BLEND_DST_ALPHA 0x80CA +#define GL_BLEND_SRC_ALPHA 0x80CB +#define GL_CONSTANT_COLOR 0x8001 +#define GL_ONE_MINUS_CONSTANT_COLOR 0x8002 +#define GL_CONSTANT_ALPHA 0x8003 +#define GL_ONE_MINUS_CONSTANT_ALPHA 0x8004 +#define GL_BLEND_COLOR 0x8005 + +/* Buffer Objects */ +#define GL_ARRAY_BUFFER 0x8892 +#define GL_ELEMENT_ARRAY_BUFFER 0x8893 +#define GL_ARRAY_BUFFER_BINDING 0x8894 +#define GL_ELEMENT_ARRAY_BUFFER_BINDING 0x8895 + +#define GL_STREAM_DRAW 0x88E0 +#define GL_STATIC_DRAW 0x88E4 +#define GL_DYNAMIC_DRAW 0x88E8 + +#define GL_BUFFER_SIZE 0x8764 +#define GL_BUFFER_USAGE 0x8765 + +#define GL_CURRENT_VERTEX_ATTRIB 0x8626 + +/* CullFaceMode */ +#define GL_FRONT 0x0404 +#define GL_BACK 0x0405 +#define GL_FRONT_AND_BACK 0x0408 + +/* DepthFunction */ +/* GL_NEVER */ +/* GL_LESS */ +/* GL_EQUAL */ +/* GL_LEQUAL */ +/* GL_GREATER */ +/* GL_NOTEQUAL */ +/* GL_GEQUAL */ +/* GL_ALWAYS */ + +/* EnableCap */ +#define GL_TEXTURE_2D 0x0DE1 +#define GL_CULL_FACE 0x0B44 +#define GL_BLEND 0x0BE2 +#define GL_DITHER 0x0BD0 +#define GL_STENCIL_TEST 0x0B90 +#define GL_DEPTH_TEST 0x0B71 +#define GL_SCISSOR_TEST 0x0C11 +#define GL_POLYGON_OFFSET_FILL 0x8037 +#define GL_SAMPLE_ALPHA_TO_COVERAGE 0x809E +#define GL_SAMPLE_COVERAGE 0x80A0 + +/* ErrorCode */ +#define GL_NO_ERROR 0 +#define GL_INVALID_ENUM 0x0500 +#define GL_INVALID_VALUE 0x0501 +#define GL_INVALID_OPERATION 0x0502 +#define GL_OUT_OF_MEMORY 0x0505 + +/* FrontFaceDirection */ +#define GL_CW 0x0900 +#define GL_CCW 0x0901 + +/* GetPName */ +#define GL_LINE_WIDTH 0x0B21 +#define GL_ALIASED_POINT_SIZE_RANGE 0x846D +#define GL_ALIASED_LINE_WIDTH_RANGE 0x846E +#define GL_CULL_FACE_MODE 0x0B45 +#define GL_FRONT_FACE 0x0B46 +#define GL_DEPTH_RANGE 0x0B70 +#define GL_DEPTH_WRITEMASK 0x0B72 +#define GL_DEPTH_CLEAR_VALUE 0x0B73 +#define GL_DEPTH_FUNC 0x0B74 +#define GL_STENCIL_CLEAR_VALUE 0x0B91 +#define GL_STENCIL_FUNC 0x0B92 +#define GL_STENCIL_FAIL 0x0B94 +#define GL_STENCIL_PASS_DEPTH_FAIL 0x0B95 +#define GL_STENCIL_PASS_DEPTH_PASS 0x0B96 +#define GL_STENCIL_REF 0x0B97 +#define GL_STENCIL_VALUE_MASK 0x0B93 +#define GL_STENCIL_WRITEMASK 0x0B98 +#define GL_STENCIL_BACK_FUNC 0x8800 +#define GL_STENCIL_BACK_FAIL 0x8801 +#define GL_STENCIL_BACK_PASS_DEPTH_FAIL 0x8802 +#define GL_STENCIL_BACK_PASS_DEPTH_PASS 0x8803 +#define GL_STENCIL_BACK_REF 0x8CA3 +#define GL_STENCIL_BACK_VALUE_MASK 0x8CA4 +#define GL_STENCIL_BACK_WRITEMASK 0x8CA5 +#define GL_VIEWPORT 0x0BA2 +#define GL_SCISSOR_BOX 0x0C10 +/* GL_SCISSOR_TEST */ +#define GL_COLOR_CLEAR_VALUE 0x0C22 +#define GL_COLOR_WRITEMASK 0x0C23 +#define GL_UNPACK_ALIGNMENT 0x0CF5 +#define GL_PACK_ALIGNMENT 0x0D05 +#define GL_MAX_TEXTURE_SIZE 0x0D33 +#define GL_MAX_VIEWPORT_DIMS 0x0D3A +#define GL_SUBPIXEL_BITS 0x0D50 +#define GL_RED_BITS 0x0D52 +#define GL_GREEN_BITS 0x0D53 +#define GL_BLUE_BITS 0x0D54 +#define GL_ALPHA_BITS 0x0D55 +#define GL_DEPTH_BITS 0x0D56 +#define GL_STENCIL_BITS 0x0D57 +#define GL_POLYGON_OFFSET_UNITS 0x2A00 +/* GL_POLYGON_OFFSET_FILL */ +#define GL_POLYGON_OFFSET_FACTOR 0x8038 +#define GL_TEXTURE_BINDING_2D 0x8069 +#define GL_SAMPLE_BUFFERS 0x80A8 +#define GL_SAMPLES 0x80A9 +#define GL_SAMPLE_COVERAGE_VALUE 0x80AA +#define GL_SAMPLE_COVERAGE_INVERT 0x80AB + +/* GetTextureParameter */ +/* GL_TEXTURE_MAG_FILTER */ +/* GL_TEXTURE_MIN_FILTER */ +/* GL_TEXTURE_WRAP_S */ +/* GL_TEXTURE_WRAP_T */ + +#define GL_NUM_COMPRESSED_TEXTURE_FORMATS 0x86A2 +#define GL_COMPRESSED_TEXTURE_FORMATS 0x86A3 + +/* HintMode */ +#define GL_DONT_CARE 0x1100 +#define GL_FASTEST 0x1101 +#define GL_NICEST 0x1102 + +/* HintTarget */ +#define GL_GENERATE_MIPMAP_HINT 0x8192 + +/* DataType */ +#define GL_BYTE 0x1400 +#define GL_UNSIGNED_BYTE 0x1401 +#define GL_SHORT 0x1402 +#define GL_UNSIGNED_SHORT 0x1403 +#define GL_INT 0x1404 +#define GL_UNSIGNED_INT 0x1405 +#define GL_FLOAT 0x1406 +#define GL_FIXED 0x140C + +/* PixelFormat */ +#define GL_DEPTH_COMPONENT 0x1902 +#define GL_ALPHA 0x1906 +#define GL_RGB 0x1907 +#define GL_RGBA 0x1908 +#define GL_LUMINANCE 0x1909 +#define GL_LUMINANCE_ALPHA 0x190A + +/* PixelType */ +/* GL_UNSIGNED_BYTE */ +#define GL_UNSIGNED_SHORT_4_4_4_4 0x8033 +#define GL_UNSIGNED_SHORT_5_5_5_1 0x8034 +#define GL_UNSIGNED_SHORT_5_6_5 0x8363 + +/* Shaders */ +#define GL_FRAGMENT_SHADER 0x8B30 +#define GL_VERTEX_SHADER 0x8B31 +#define GL_MAX_VERTEX_ATTRIBS 0x8869 +#define GL_MAX_VERTEX_UNIFORM_VECTORS 0x8DFB +#define GL_MAX_VARYING_VECTORS 0x8DFC +#define GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS 0x8B4D +#define GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS 0x8B4C +#define GL_MAX_TEXTURE_IMAGE_UNITS 0x8872 +#define GL_MAX_FRAGMENT_UNIFORM_VECTORS 0x8DFD +#define GL_SHADER_TYPE 0x8B4F +#define GL_DELETE_STATUS 0x8B80 +#define GL_LINK_STATUS 0x8B82 +#define GL_VALIDATE_STATUS 0x8B83 +#define GL_ATTACHED_SHADERS 0x8B85 +#define GL_ACTIVE_UNIFORMS 0x8B86 +#define GL_ACTIVE_UNIFORM_MAX_LENGTH 0x8B87 +#define GL_ACTIVE_ATTRIBUTES 0x8B89 +#define GL_ACTIVE_ATTRIBUTE_MAX_LENGTH 0x8B8A +#define GL_SHADING_LANGUAGE_VERSION 0x8B8C +#define GL_CURRENT_PROGRAM 0x8B8D + +/* StencilFunction */ +#define GL_NEVER 0x0200 +#define GL_LESS 0x0201 +#define GL_EQUAL 0x0202 +#define GL_LEQUAL 0x0203 +#define GL_GREATER 0x0204 +#define GL_NOTEQUAL 0x0205 +#define GL_GEQUAL 0x0206 +#define GL_ALWAYS 0x0207 + +/* StencilOp */ +/* GL_ZERO */ +#define GL_KEEP 0x1E00 +#define GL_REPLACE 0x1E01 +#define GL_INCR 0x1E02 +#define GL_DECR 0x1E03 +#define GL_INVERT 0x150A +#define GL_INCR_WRAP 0x8507 +#define GL_DECR_WRAP 0x8508 + +/* StringName */ +#define GL_VENDOR 0x1F00 +#define GL_RENDERER 0x1F01 +#define GL_VERSION 0x1F02 +#define GL_EXTENSIONS 0x1F03 + +/* TextureMagFilter */ +#define GL_NEAREST 0x2600 +#define GL_LINEAR 0x2601 + +/* TextureMinFilter */ +/* GL_NEAREST */ +/* GL_LINEAR */ +#define GL_NEAREST_MIPMAP_NEAREST 0x2700 +#define GL_LINEAR_MIPMAP_NEAREST 0x2701 +#define GL_NEAREST_MIPMAP_LINEAR 0x2702 +#define GL_LINEAR_MIPMAP_LINEAR 0x2703 + +/* TextureParameterName */ +#define GL_TEXTURE_MAG_FILTER 0x2800 +#define GL_TEXTURE_MIN_FILTER 0x2801 +#define GL_TEXTURE_WRAP_S 0x2802 +#define GL_TEXTURE_WRAP_T 0x2803 + +/* TextureTarget */ +/* GL_TEXTURE_2D */ +#define GL_TEXTURE 0x1702 + +#define GL_TEXTURE_CUBE_MAP 0x8513 +#define GL_TEXTURE_BINDING_CUBE_MAP 0x8514 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x8515 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x8516 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x8517 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x8518 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x8519 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x851A +#define GL_MAX_CUBE_MAP_TEXTURE_SIZE 0x851C + +/* TextureUnit */ +#define GL_TEXTURE0 0x84C0 +#define GL_TEXTURE1 0x84C1 +#define GL_TEXTURE2 0x84C2 +#define GL_TEXTURE3 0x84C3 +#define GL_TEXTURE4 0x84C4 +#define GL_TEXTURE5 0x84C5 +#define GL_TEXTURE6 0x84C6 +#define GL_TEXTURE7 0x84C7 +#define GL_TEXTURE8 0x84C8 +#define GL_TEXTURE9 0x84C9 +#define GL_TEXTURE10 0x84CA +#define GL_TEXTURE11 0x84CB +#define GL_TEXTURE12 0x84CC +#define GL_TEXTURE13 0x84CD +#define GL_TEXTURE14 0x84CE +#define GL_TEXTURE15 0x84CF +#define GL_TEXTURE16 0x84D0 +#define GL_TEXTURE17 0x84D1 +#define GL_TEXTURE18 0x84D2 +#define GL_TEXTURE19 0x84D3 +#define GL_TEXTURE20 0x84D4 +#define GL_TEXTURE21 0x84D5 +#define GL_TEXTURE22 0x84D6 +#define GL_TEXTURE23 0x84D7 +#define GL_TEXTURE24 0x84D8 +#define GL_TEXTURE25 0x84D9 +#define GL_TEXTURE26 0x84DA +#define GL_TEXTURE27 0x84DB +#define GL_TEXTURE28 0x84DC +#define GL_TEXTURE29 0x84DD +#define GL_TEXTURE30 0x84DE +#define GL_TEXTURE31 0x84DF +#define GL_ACTIVE_TEXTURE 0x84E0 + +/* TextureWrapMode */ +#define GL_REPEAT 0x2901 +#define GL_CLAMP_TO_EDGE 0x812F +#define GL_MIRRORED_REPEAT 0x8370 + +/* Uniform Types */ +#define GL_FLOAT_VEC2 0x8B50 +#define GL_FLOAT_VEC3 0x8B51 +#define GL_FLOAT_VEC4 0x8B52 +#define GL_INT_VEC2 0x8B53 +#define GL_INT_VEC3 0x8B54 +#define GL_INT_VEC4 0x8B55 +#define GL_BOOL 0x8B56 +#define GL_BOOL_VEC2 0x8B57 +#define GL_BOOL_VEC3 0x8B58 +#define GL_BOOL_VEC4 0x8B59 +#define GL_FLOAT_MAT2 0x8B5A +#define GL_FLOAT_MAT3 0x8B5B +#define GL_FLOAT_MAT4 0x8B5C +#define GL_SAMPLER_2D 0x8B5E +#define GL_SAMPLER_CUBE 0x8B60 + +/* Vertex Arrays */ +#define GL_VERTEX_ATTRIB_ARRAY_ENABLED 0x8622 +#define GL_VERTEX_ATTRIB_ARRAY_SIZE 0x8623 +#define GL_VERTEX_ATTRIB_ARRAY_STRIDE 0x8624 +#define GL_VERTEX_ATTRIB_ARRAY_TYPE 0x8625 +#define GL_VERTEX_ATTRIB_ARRAY_NORMALIZED 0x886A +#define GL_VERTEX_ATTRIB_ARRAY_POINTER 0x8645 +#define GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING 0x889F + +/* Read Format */ +#define GL_IMPLEMENTATION_COLOR_READ_TYPE 0x8B9A +#define GL_IMPLEMENTATION_COLOR_READ_FORMAT 0x8B9B + +/* Shader Source */ +#define GL_COMPILE_STATUS 0x8B81 +#define GL_INFO_LOG_LENGTH 0x8B84 +#define GL_SHADER_SOURCE_LENGTH 0x8B88 +#define GL_SHADER_COMPILER 0x8DFA + +/* Shader Binary */ +#define GL_SHADER_BINARY_FORMATS 0x8DF8 +#define GL_NUM_SHADER_BINARY_FORMATS 0x8DF9 + +/* Shader Precision-Specified Types */ +#define GL_LOW_FLOAT 0x8DF0 +#define GL_MEDIUM_FLOAT 0x8DF1 +#define GL_HIGH_FLOAT 0x8DF2 +#define GL_LOW_INT 0x8DF3 +#define GL_MEDIUM_INT 0x8DF4 +#define GL_HIGH_INT 0x8DF5 + +/* Framebuffer Object. */ +#define GL_FRAMEBUFFER 0x8D40 +#define GL_RENDERBUFFER 0x8D41 + +#define GL_RGBA4 0x8056 +#define GL_RGB5_A1 0x8057 +#define GL_RGB565 0x8D62 +#define GL_DEPTH_COMPONENT16 0x81A5 +#define GL_STENCIL_INDEX8 0x8D48 + +#define GL_RENDERBUFFER_WIDTH 0x8D42 +#define GL_RENDERBUFFER_HEIGHT 0x8D43 +#define GL_RENDERBUFFER_INTERNAL_FORMAT 0x8D44 +#define GL_RENDERBUFFER_RED_SIZE 0x8D50 +#define GL_RENDERBUFFER_GREEN_SIZE 0x8D51 +#define GL_RENDERBUFFER_BLUE_SIZE 0x8D52 +#define GL_RENDERBUFFER_ALPHA_SIZE 0x8D53 +#define GL_RENDERBUFFER_DEPTH_SIZE 0x8D54 +#define GL_RENDERBUFFER_STENCIL_SIZE 0x8D55 + +#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE 0x8CD0 +#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME 0x8CD1 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL 0x8CD2 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE 0x8CD3 + +#define GL_COLOR_ATTACHMENT0 0x8CE0 +#define GL_DEPTH_ATTACHMENT 0x8D00 +#define GL_STENCIL_ATTACHMENT 0x8D20 + +#define GL_NONE 0 + +#define GL_FRAMEBUFFER_COMPLETE 0x8CD5 +#define GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT 0x8CD6 +#define GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT 0x8CD7 +#define GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS 0x8CD9 +#define GL_FRAMEBUFFER_UNSUPPORTED 0x8CDD + +#define GL_FRAMEBUFFER_BINDING 0x8CA6 +#define GL_RENDERBUFFER_BINDING 0x8CA7 +#define GL_MAX_RENDERBUFFER_SIZE 0x84E8 + +#define GL_INVALID_FRAMEBUFFER_OPERATION 0x0506 + +/* OpenGL ES 3.0 */ + +#define GL_READ_BUFFER 0x0C02 +#define GL_UNPACK_ROW_LENGTH 0x0CF2 +#define GL_UNPACK_SKIP_ROWS 0x0CF3 +#define GL_UNPACK_SKIP_PIXELS 0x0CF4 +#define GL_PACK_ROW_LENGTH 0x0D02 +#define GL_PACK_SKIP_ROWS 0x0D03 +#define GL_PACK_SKIP_PIXELS 0x0D04 +#define GL_COLOR 0x1800 +#define GL_DEPTH 0x1801 +#define GL_STENCIL 0x1802 +#define GL_RED 0x1903 +#define GL_RGB8 0x8051 +#define GL_RGBA8 0x8058 +#define GL_RGB10_A2 0x8059 +#define GL_TEXTURE_BINDING_3D 0x806A +#define GL_UNPACK_SKIP_IMAGES 0x806D +#define GL_UNPACK_IMAGE_HEIGHT 0x806E +#define GL_TEXTURE_3D 0x806F +#define GL_TEXTURE_WRAP_R 0x8072 +#define GL_MAX_3D_TEXTURE_SIZE 0x8073 +#define GL_UNSIGNED_INT_2_10_10_10_REV 0x8368 +#define GL_MAX_ELEMENTS_VERTICES 0x80E8 +#define GL_MAX_ELEMENTS_INDICES 0x80E9 +#define GL_TEXTURE_MIN_LOD 0x813A +#define GL_TEXTURE_MAX_LOD 0x813B +#define GL_TEXTURE_BASE_LEVEL 0x813C +#define GL_TEXTURE_MAX_LEVEL 0x813D +#define GL_MIN 0x8007 +#define GL_MAX 0x8008 +#define GL_DEPTH_COMPONENT24 0x81A6 +#define GL_MAX_TEXTURE_LOD_BIAS 0x84FD +#define GL_TEXTURE_COMPARE_MODE 0x884C +#define GL_TEXTURE_COMPARE_FUNC 0x884D +#define GL_CURRENT_QUERY 0x8865 +#define GL_QUERY_RESULT 0x8866 +#define GL_QUERY_RESULT_AVAILABLE 0x8867 +#define GL_BUFFER_MAPPED 0x88BC +#define GL_BUFFER_MAP_POINTER 0x88BD +#define GL_STREAM_READ 0x88E1 +#define GL_STREAM_COPY 0x88E2 +#define GL_STATIC_READ 0x88E5 +#define GL_STATIC_COPY 0x88E6 +#define GL_DYNAMIC_READ 0x88E9 +#define GL_DYNAMIC_COPY 0x88EA +#define GL_MAX_DRAW_BUFFERS 0x8824 +#define GL_DRAW_BUFFER0 0x8825 +#define GL_DRAW_BUFFER1 0x8826 +#define GL_DRAW_BUFFER2 0x8827 +#define GL_DRAW_BUFFER3 0x8828 +#define GL_DRAW_BUFFER4 0x8829 +#define GL_DRAW_BUFFER5 0x882A +#define GL_DRAW_BUFFER6 0x882B +#define GL_DRAW_BUFFER7 0x882C +#define GL_DRAW_BUFFER8 0x882D +#define GL_DRAW_BUFFER9 0x882E +#define GL_DRAW_BUFFER10 0x882F +#define GL_DRAW_BUFFER11 0x8830 +#define GL_DRAW_BUFFER12 0x8831 +#define GL_DRAW_BUFFER13 0x8832 +#define GL_DRAW_BUFFER14 0x8833 +#define GL_DRAW_BUFFER15 0x8834 +#define GL_MAX_FRAGMENT_UNIFORM_COMPONENTS 0x8B49 +#define GL_MAX_VERTEX_UNIFORM_COMPONENTS 0x8B4A +#define GL_SAMPLER_3D 0x8B5F +#define GL_SAMPLER_2D_SHADOW 0x8B62 +#define GL_FRAGMENT_SHADER_DERIVATIVE_HINT 0x8B8B +#define GL_PIXEL_PACK_BUFFER 0x88EB +#define GL_PIXEL_UNPACK_BUFFER 0x88EC +#define GL_PIXEL_PACK_BUFFER_BINDING 0x88ED +#define GL_PIXEL_UNPACK_BUFFER_BINDING 0x88EF +#define GL_FLOAT_MAT2x3 0x8B65 +#define GL_FLOAT_MAT2x4 0x8B66 +#define GL_FLOAT_MAT3x2 0x8B67 +#define GL_FLOAT_MAT3x4 0x8B68 +#define GL_FLOAT_MAT4x2 0x8B69 +#define GL_FLOAT_MAT4x3 0x8B6A +#define GL_SRGB 0x8C40 +#define GL_SRGB8 0x8C41 +#define GL_SRGB8_ALPHA8 0x8C43 +#define GL_COMPARE_REF_TO_TEXTURE 0x884E +#define GL_MAJOR_VERSION 0x821B +#define GL_MINOR_VERSION 0x821C +#define GL_NUM_EXTENSIONS 0x821D +#define GL_RGBA32F 0x8814 +#define GL_RGB32F 0x8815 +#define GL_RGBA16F 0x881A +#define GL_RGB16F 0x881B +#define GL_VERTEX_ATTRIB_ARRAY_INTEGER 0x88FD +#define GL_MAX_ARRAY_TEXTURE_LAYERS 0x88FF +#define GL_MIN_PROGRAM_TEXEL_OFFSET 0x8904 +#define GL_MAX_PROGRAM_TEXEL_OFFSET 0x8905 +#define GL_MAX_VARYING_COMPONENTS 0x8B4B +#define GL_TEXTURE_2D_ARRAY 0x8C1A +#define GL_TEXTURE_BINDING_2D_ARRAY 0x8C1D +#define GL_R11F_G11F_B10F 0x8C3A +#define GL_UNSIGNED_INT_10F_11F_11F_REV 0x8C3B +#define GL_RGB9_E5 0x8C3D +#define GL_UNSIGNED_INT_5_9_9_9_REV 0x8C3E +#define GL_TRANSFORM_FEEDBACK_VARYING_MAX_LENGTH 0x8C76 +#define GL_TRANSFORM_FEEDBACK_BUFFER_MODE 0x8C7F +#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS 0x8C80 +#define GL_TRANSFORM_FEEDBACK_VARYINGS 0x8C83 +#define GL_TRANSFORM_FEEDBACK_BUFFER_START 0x8C84 +#define GL_TRANSFORM_FEEDBACK_BUFFER_SIZE 0x8C85 +#define GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN 0x8C88 +#define GL_RASTERIZER_DISCARD 0x8C89 +#define GL_MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS 0x8C8A +#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS 0x8C8B +#define GL_INTERLEAVED_ATTRIBS 0x8C8C +#define GL_SEPARATE_ATTRIBS 0x8C8D +#define GL_TRANSFORM_FEEDBACK_BUFFER 0x8C8E +#define GL_TRANSFORM_FEEDBACK_BUFFER_BINDING 0x8C8F +#define GL_RGBA32UI 0x8D70 +#define GL_RGB32UI 0x8D71 +#define GL_RGBA16UI 0x8D76 +#define GL_RGB16UI 0x8D77 +#define GL_RGBA8UI 0x8D7C +#define GL_RGB8UI 0x8D7D +#define GL_RGBA32I 0x8D82 +#define GL_RGB32I 0x8D83 +#define GL_RGBA16I 0x8D88 +#define GL_RGB16I 0x8D89 +#define GL_RGBA8I 0x8D8E +#define GL_RGB8I 0x8D8F +#define GL_RED_INTEGER 0x8D94 +#define GL_RGB_INTEGER 0x8D98 +#define GL_RGBA_INTEGER 0x8D99 +#define GL_SAMPLER_2D_ARRAY 0x8DC1 +#define GL_SAMPLER_2D_ARRAY_SHADOW 0x8DC4 +#define GL_SAMPLER_CUBE_SHADOW 0x8DC5 +#define GL_UNSIGNED_INT_VEC2 0x8DC6 +#define GL_UNSIGNED_INT_VEC3 0x8DC7 +#define GL_UNSIGNED_INT_VEC4 0x8DC8 +#define GL_INT_SAMPLER_2D 0x8DCA +#define GL_INT_SAMPLER_3D 0x8DCB +#define GL_INT_SAMPLER_CUBE 0x8DCC +#define GL_INT_SAMPLER_2D_ARRAY 0x8DCF +#define GL_UNSIGNED_INT_SAMPLER_2D 0x8DD2 +#define GL_UNSIGNED_INT_SAMPLER_3D 0x8DD3 +#define GL_UNSIGNED_INT_SAMPLER_CUBE 0x8DD4 +#define GL_UNSIGNED_INT_SAMPLER_2D_ARRAY 0x8DD7 +#define GL_BUFFER_ACCESS_FLAGS 0x911F +#define GL_BUFFER_MAP_LENGTH 0x9120 +#define GL_BUFFER_MAP_OFFSET 0x9121 +#define GL_DEPTH_COMPONENT32F 0x8CAC +#define GL_DEPTH32F_STENCIL8 0x8CAD +#define GL_FLOAT_32_UNSIGNED_INT_24_8_REV 0x8DAD +#define GL_FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING 0x8210 +#define GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE 0x8211 +#define GL_FRAMEBUFFER_ATTACHMENT_RED_SIZE 0x8212 +#define GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE 0x8213 +#define GL_FRAMEBUFFER_ATTACHMENT_BLUE_SIZE 0x8214 +#define GL_FRAMEBUFFER_ATTACHMENT_ALPHA_SIZE 0x8215 +#define GL_FRAMEBUFFER_ATTACHMENT_DEPTH_SIZE 0x8216 +#define GL_FRAMEBUFFER_ATTACHMENT_STENCIL_SIZE 0x8217 +#define GL_FRAMEBUFFER_DEFAULT 0x8218 +#define GL_FRAMEBUFFER_UNDEFINED 0x8219 +#define GL_DEPTH_STENCIL_ATTACHMENT 0x821A +#define GL_DEPTH_STENCIL 0x84F9 +#define GL_UNSIGNED_INT_24_8 0x84FA +#define GL_DEPTH24_STENCIL8 0x88F0 +#define GL_UNSIGNED_NORMALIZED 0x8C17 +#define GL_DRAW_FRAMEBUFFER_BINDING GL_FRAMEBUFFER_BINDING +#define GL_READ_FRAMEBUFFER 0x8CA8 +#define GL_DRAW_FRAMEBUFFER 0x8CA9 +#define GL_READ_FRAMEBUFFER_BINDING 0x8CAA +#define GL_RENDERBUFFER_SAMPLES 0x8CAB +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LAYER 0x8CD4 +#define GL_MAX_COLOR_ATTACHMENTS 0x8CDF +#define GL_COLOR_ATTACHMENT1 0x8CE1 +#define GL_COLOR_ATTACHMENT2 0x8CE2 +#define GL_COLOR_ATTACHMENT3 0x8CE3 +#define GL_COLOR_ATTACHMENT4 0x8CE4 +#define GL_COLOR_ATTACHMENT5 0x8CE5 +#define GL_COLOR_ATTACHMENT6 0x8CE6 +#define GL_COLOR_ATTACHMENT7 0x8CE7 +#define GL_COLOR_ATTACHMENT8 0x8CE8 +#define GL_COLOR_ATTACHMENT9 0x8CE9 +#define GL_COLOR_ATTACHMENT10 0x8CEA +#define GL_COLOR_ATTACHMENT11 0x8CEB +#define GL_COLOR_ATTACHMENT12 0x8CEC +#define GL_COLOR_ATTACHMENT13 0x8CED +#define GL_COLOR_ATTACHMENT14 0x8CEE +#define GL_COLOR_ATTACHMENT15 0x8CEF +#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE 0x8D56 +#define GL_MAX_SAMPLES 0x8D57 +#define GL_HALF_FLOAT 0x140B +#define GL_MAP_READ_BIT 0x0001 +#define GL_MAP_WRITE_BIT 0x0002 +#define GL_MAP_INVALIDATE_RANGE_BIT 0x0004 +#define GL_MAP_INVALIDATE_BUFFER_BIT 0x0008 +#define GL_MAP_FLUSH_EXPLICIT_BIT 0x0010 +#define GL_MAP_UNSYNCHRONIZED_BIT 0x0020 +#define GL_RG 0x8227 +#define GL_RG_INTEGER 0x8228 +#define GL_R8 0x8229 +#define GL_RG8 0x822B +#define GL_R16F 0x822D +#define GL_R32F 0x822E +#define GL_RG16F 0x822F +#define GL_RG32F 0x8230 +#define GL_R8I 0x8231 +#define GL_R8UI 0x8232 +#define GL_R16I 0x8233 +#define GL_R16UI 0x8234 +#define GL_R32I 0x8235 +#define GL_R32UI 0x8236 +#define GL_RG8I 0x8237 +#define GL_RG8UI 0x8238 +#define GL_RG16I 0x8239 +#define GL_RG16UI 0x823A +#define GL_RG32I 0x823B +#define GL_RG32UI 0x823C +#define GL_VERTEX_ARRAY_BINDING 0x85B5 +#define GL_R8_SNORM 0x8F94 +#define GL_RG8_SNORM 0x8F95 +#define GL_RGB8_SNORM 0x8F96 +#define GL_RGBA8_SNORM 0x8F97 +#define GL_SIGNED_NORMALIZED 0x8F9C +#define GL_PRIMITIVE_RESTART_FIXED_INDEX 0x8D69 +#define GL_COPY_READ_BUFFER 0x8F36 +#define GL_COPY_WRITE_BUFFER 0x8F37 +#define GL_COPY_READ_BUFFER_BINDING GL_COPY_READ_BUFFER +#define GL_COPY_WRITE_BUFFER_BINDING GL_COPY_WRITE_BUFFER +#define GL_UNIFORM_BUFFER 0x8A11 +#define GL_UNIFORM_BUFFER_BINDING 0x8A28 +#define GL_UNIFORM_BUFFER_START 0x8A29 +#define GL_UNIFORM_BUFFER_SIZE 0x8A2A +#define GL_MAX_VERTEX_UNIFORM_BLOCKS 0x8A2B +#define GL_MAX_FRAGMENT_UNIFORM_BLOCKS 0x8A2D +#define GL_MAX_COMBINED_UNIFORM_BLOCKS 0x8A2E +#define GL_MAX_UNIFORM_BUFFER_BINDINGS 0x8A2F +#define GL_MAX_UNIFORM_BLOCK_SIZE 0x8A30 +#define GL_MAX_COMBINED_VERTEX_UNIFORM_COMPONENTS 0x8A31 +#define GL_MAX_COMBINED_FRAGMENT_UNIFORM_COMPONENTS 0x8A33 +#define GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT 0x8A34 +#define GL_ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH 0x8A35 +#define GL_ACTIVE_UNIFORM_BLOCKS 0x8A36 +#define GL_UNIFORM_TYPE 0x8A37 +#define GL_UNIFORM_SIZE 0x8A38 +#define GL_UNIFORM_NAME_LENGTH 0x8A39 +#define GL_UNIFORM_BLOCK_INDEX 0x8A3A +#define GL_UNIFORM_OFFSET 0x8A3B +#define GL_UNIFORM_ARRAY_STRIDE 0x8A3C +#define GL_UNIFORM_MATRIX_STRIDE 0x8A3D +#define GL_UNIFORM_IS_ROW_MAJOR 0x8A3E +#define GL_UNIFORM_BLOCK_BINDING 0x8A3F +#define GL_UNIFORM_BLOCK_DATA_SIZE 0x8A40 +#define GL_UNIFORM_BLOCK_NAME_LENGTH 0x8A41 +#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORMS 0x8A42 +#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORM_INDICES 0x8A43 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_VERTEX_SHADER 0x8A44 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_FRAGMENT_SHADER 0x8A46 +#define GL_INVALID_INDEX 0xFFFFFFFFu +#define GL_MAX_VERTEX_OUTPUT_COMPONENTS 0x9122 +#define GL_MAX_FRAGMENT_INPUT_COMPONENTS 0x9125 +#define GL_MAX_SERVER_WAIT_TIMEOUT 0x9111 +#define GL_OBJECT_TYPE 0x9112 +#define GL_SYNC_CONDITION 0x9113 +#define GL_SYNC_STATUS 0x9114 +#define GL_SYNC_FLAGS 0x9115 +#define GL_SYNC_FENCE 0x9116 +#define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117 +#define GL_UNSIGNALED 0x9118 +#define GL_SIGNALED 0x9119 +#define GL_ALREADY_SIGNALED 0x911A +#define GL_TIMEOUT_EXPIRED 0x911B +#define GL_CONDITION_SATISFIED 0x911C +#define GL_WAIT_FAILED 0x911D +#define GL_SYNC_FLUSH_COMMANDS_BIT 0x00000001 +#define GL_TIMEOUT_IGNORED 0xFFFFFFFFFFFFFFFFull +#define GL_VERTEX_ATTRIB_ARRAY_DIVISOR 0x88FE +#define GL_ANY_SAMPLES_PASSED 0x8C2F +#define GL_ANY_SAMPLES_PASSED_CONSERVATIVE 0x8D6A +#define GL_SAMPLER_BINDING 0x8919 +#define GL_RGB10_A2UI 0x906F +#define GL_TEXTURE_SWIZZLE_R 0x8E42 +#define GL_TEXTURE_SWIZZLE_G 0x8E43 +#define GL_TEXTURE_SWIZZLE_B 0x8E44 +#define GL_TEXTURE_SWIZZLE_A 0x8E45 +#define GL_GREEN 0x1904 +#define GL_BLUE 0x1905 +#define GL_INT_2_10_10_10_REV 0x8D9F +#define GL_TRANSFORM_FEEDBACK 0x8E22 +#define GL_TRANSFORM_FEEDBACK_PAUSED 0x8E23 +#define GL_TRANSFORM_FEEDBACK_ACTIVE 0x8E24 +#define GL_TRANSFORM_FEEDBACK_BINDING 0x8E25 +#define GL_PROGRAM_BINARY_RETRIEVABLE_HINT 0x8257 +#define GL_PROGRAM_BINARY_LENGTH 0x8741 +#define GL_NUM_PROGRAM_BINARY_FORMATS 0x87FE +#define GL_PROGRAM_BINARY_FORMATS 0x87FF +#define GL_COMPRESSED_R11_EAC 0x9270 +#define GL_COMPRESSED_SIGNED_R11_EAC 0x9271 +#define GL_COMPRESSED_RG11_EAC 0x9272 +#define GL_COMPRESSED_SIGNED_RG11_EAC 0x9273 +#define GL_COMPRESSED_RGB8_ETC2 0x9274 +#define GL_COMPRESSED_SRGB8_ETC2 0x9275 +#define GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2 0x9276 +#define GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2 0x9277 +#define GL_COMPRESSED_RGBA8_ETC2_EAC 0x9278 +#define GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC 0x9279 +#define GL_TEXTURE_IMMUTABLE_FORMAT 0x912F +#define GL_MAX_ELEMENT_INDEX 0x8D6B +#define GL_NUM_SAMPLE_COUNTS 0x9380 +#define GL_TEXTURE_IMMUTABLE_LEVELS 0x82DF + +/*------------------------------------------------------------------------- + * Entrypoint definitions + *-----------------------------------------------------------------------*/ + +/* OpenGL ES 2.0 */ + +GL_APICALL void GL_APIENTRY glActiveTexture (GLenum texture); +GL_APICALL void GL_APIENTRY glAttachShader (GLuint program, GLuint shader); +GL_APICALL void GL_APIENTRY glBindAttribLocation (GLuint program, GLuint index, const GLchar* name); +GL_APICALL void GL_APIENTRY glBindBuffer (GLenum target, GLuint buffer); +GL_APICALL void GL_APIENTRY glBindFramebuffer (GLenum target, GLuint framebuffer); +GL_APICALL void GL_APIENTRY glBindRenderbuffer (GLenum target, GLuint renderbuffer); +GL_APICALL void GL_APIENTRY glBindTexture (GLenum target, GLuint texture); +GL_APICALL void GL_APIENTRY glBlendColor (GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha); +GL_APICALL void GL_APIENTRY glBlendEquation (GLenum mode); +GL_APICALL void GL_APIENTRY glBlendEquationSeparate (GLenum modeRGB, GLenum modeAlpha); +GL_APICALL void GL_APIENTRY glBlendFunc (GLenum sfactor, GLenum dfactor); +GL_APICALL void GL_APIENTRY glBlendFuncSeparate (GLenum srcRGB, GLenum dstRGB, GLenum srcAlpha, GLenum dstAlpha); +GL_APICALL void GL_APIENTRY glBufferData (GLenum target, GLsizeiptr size, const GLvoid* data, GLenum usage); +GL_APICALL void GL_APIENTRY glBufferSubData (GLenum target, GLintptr offset, GLsizeiptr size, const GLvoid* data); +GL_APICALL GLenum GL_APIENTRY glCheckFramebufferStatus (GLenum target); +GL_APICALL void GL_APIENTRY glClear (GLbitfield mask); +GL_APICALL void GL_APIENTRY glClearColor (GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha); +GL_APICALL void GL_APIENTRY glClearDepthf (GLfloat depth); +GL_APICALL void GL_APIENTRY glClearStencil (GLint s); +GL_APICALL void GL_APIENTRY glColorMask (GLboolean red, GLboolean green, GLboolean blue, GLboolean alpha); +GL_APICALL void GL_APIENTRY glCompileShader (GLuint shader); +GL_APICALL void GL_APIENTRY glCompressedTexImage2D (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const GLvoid* data); +GL_APICALL void GL_APIENTRY glCompressedTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const GLvoid* data); +GL_APICALL void GL_APIENTRY glCopyTexImage2D (GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border); +GL_APICALL void GL_APIENTRY glCopyTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height); +GL_APICALL GLuint GL_APIENTRY glCreateProgram (void); +GL_APICALL GLuint GL_APIENTRY glCreateShader (GLenum type); +GL_APICALL void GL_APIENTRY glCullFace (GLenum mode); +GL_APICALL void GL_APIENTRY glDeleteBuffers (GLsizei n, const GLuint* buffers); +GL_APICALL void GL_APIENTRY glDeleteFramebuffers (GLsizei n, const GLuint* framebuffers); +GL_APICALL void GL_APIENTRY glDeleteProgram (GLuint program); +GL_APICALL void GL_APIENTRY glDeleteRenderbuffers (GLsizei n, const GLuint* renderbuffers); +GL_APICALL void GL_APIENTRY glDeleteShader (GLuint shader); +GL_APICALL void GL_APIENTRY glDeleteTextures (GLsizei n, const GLuint* textures); +GL_APICALL void GL_APIENTRY glDepthFunc (GLenum func); +GL_APICALL void GL_APIENTRY glDepthMask (GLboolean flag); +GL_APICALL void GL_APIENTRY glDepthRangef (GLfloat n, GLfloat f); +GL_APICALL void GL_APIENTRY glDetachShader (GLuint program, GLuint shader); +GL_APICALL void GL_APIENTRY glDisable (GLenum cap); +GL_APICALL void GL_APIENTRY glDisableVertexAttribArray (GLuint index); +GL_APICALL void GL_APIENTRY glDrawArrays (GLenum mode, GLint first, GLsizei count); +GL_APICALL void GL_APIENTRY glDrawElements (GLenum mode, GLsizei count, GLenum type, const GLvoid* indices); +GL_APICALL void GL_APIENTRY glEnable (GLenum cap); +GL_APICALL void GL_APIENTRY glEnableVertexAttribArray (GLuint index); +GL_APICALL void GL_APIENTRY glFinish (void); +GL_APICALL void GL_APIENTRY glFlush (void); +GL_APICALL void GL_APIENTRY glFramebufferRenderbuffer (GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer); +GL_APICALL void GL_APIENTRY glFramebufferTexture2D (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); +GL_APICALL void GL_APIENTRY glFrontFace (GLenum mode); +GL_APICALL void GL_APIENTRY glGenBuffers (GLsizei n, GLuint* buffers); +GL_APICALL void GL_APIENTRY glGenerateMipmap (GLenum target); +GL_APICALL void GL_APIENTRY glGenFramebuffers (GLsizei n, GLuint* framebuffers); +GL_APICALL void GL_APIENTRY glGenRenderbuffers (GLsizei n, GLuint* renderbuffers); +GL_APICALL void GL_APIENTRY glGenTextures (GLsizei n, GLuint* textures); +GL_APICALL void GL_APIENTRY glGetActiveAttrib (GLuint program, GLuint index, GLsizei bufsize, GLsizei* length, GLint* size, GLenum* type, GLchar* name); +GL_APICALL void GL_APIENTRY glGetActiveUniform (GLuint program, GLuint index, GLsizei bufsize, GLsizei* length, GLint* size, GLenum* type, GLchar* name); +GL_APICALL void GL_APIENTRY glGetAttachedShaders (GLuint program, GLsizei maxcount, GLsizei* count, GLuint* shaders); +GL_APICALL GLint GL_APIENTRY glGetAttribLocation (GLuint program, const GLchar* name); +GL_APICALL void GL_APIENTRY glGetBooleanv (GLenum pname, GLboolean* params); +GL_APICALL void GL_APIENTRY glGetBufferParameteriv (GLenum target, GLenum pname, GLint* params); +GL_APICALL GLenum GL_APIENTRY glGetError (void); +GL_APICALL void GL_APIENTRY glGetFloatv (GLenum pname, GLfloat* params); +GL_APICALL void GL_APIENTRY glGetFramebufferAttachmentParameteriv (GLenum target, GLenum attachment, GLenum pname, GLint* params); +GL_APICALL void GL_APIENTRY glGetIntegerv (GLenum pname, GLint* params); +GL_APICALL void GL_APIENTRY glGetProgramiv (GLuint program, GLenum pname, GLint* params); +GL_APICALL void GL_APIENTRY glGetProgramInfoLog (GLuint program, GLsizei bufsize, GLsizei* length, GLchar* infolog); +GL_APICALL void GL_APIENTRY glGetRenderbufferParameteriv (GLenum target, GLenum pname, GLint* params); +GL_APICALL void GL_APIENTRY glGetShaderiv (GLuint shader, GLenum pname, GLint* params); +GL_APICALL void GL_APIENTRY glGetShaderInfoLog (GLuint shader, GLsizei bufsize, GLsizei* length, GLchar* infolog); +GL_APICALL void GL_APIENTRY glGetShaderPrecisionFormat (GLenum shadertype, GLenum precisiontype, GLint* range, GLint* precision); +GL_APICALL void GL_APIENTRY glGetShaderSource (GLuint shader, GLsizei bufsize, GLsizei* length, GLchar* source); +GL_APICALL const GLubyte* GL_APIENTRY glGetString (GLenum name); +GL_APICALL void GL_APIENTRY glGetTexParameterfv (GLenum target, GLenum pname, GLfloat* params); +GL_APICALL void GL_APIENTRY glGetTexParameteriv (GLenum target, GLenum pname, GLint* params); +GL_APICALL void GL_APIENTRY glGetUniformfv (GLuint program, GLint location, GLfloat* params); +GL_APICALL void GL_APIENTRY glGetUniformiv (GLuint program, GLint location, GLint* params); +GL_APICALL GLint GL_APIENTRY glGetUniformLocation (GLuint program, const GLchar* name); +GL_APICALL void GL_APIENTRY glGetVertexAttribfv (GLuint index, GLenum pname, GLfloat* params); +GL_APICALL void GL_APIENTRY glGetVertexAttribiv (GLuint index, GLenum pname, GLint* params); +GL_APICALL void GL_APIENTRY glGetVertexAttribPointerv (GLuint index, GLenum pname, GLvoid** pointer); +GL_APICALL void GL_APIENTRY glHint (GLenum target, GLenum mode); +GL_APICALL GLboolean GL_APIENTRY glIsBuffer (GLuint buffer); +GL_APICALL GLboolean GL_APIENTRY glIsEnabled (GLenum cap); +GL_APICALL GLboolean GL_APIENTRY glIsFramebuffer (GLuint framebuffer); +GL_APICALL GLboolean GL_APIENTRY glIsProgram (GLuint program); +GL_APICALL GLboolean GL_APIENTRY glIsRenderbuffer (GLuint renderbuffer); +GL_APICALL GLboolean GL_APIENTRY glIsShader (GLuint shader); +GL_APICALL GLboolean GL_APIENTRY glIsTexture (GLuint texture); +GL_APICALL void GL_APIENTRY glLineWidth (GLfloat width); +GL_APICALL void GL_APIENTRY glLinkProgram (GLuint program); +GL_APICALL void GL_APIENTRY glPixelStorei (GLenum pname, GLint param); +GL_APICALL void GL_APIENTRY glPolygonOffset (GLfloat factor, GLfloat units); +GL_APICALL void GL_APIENTRY glReadPixels (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLvoid* pixels); +GL_APICALL void GL_APIENTRY glReleaseShaderCompiler (void); +GL_APICALL void GL_APIENTRY glRenderbufferStorage (GLenum target, GLenum internalformat, GLsizei width, GLsizei height); +GL_APICALL void GL_APIENTRY glSampleCoverage (GLfloat value, GLboolean invert); +GL_APICALL void GL_APIENTRY glScissor (GLint x, GLint y, GLsizei width, GLsizei height); +GL_APICALL void GL_APIENTRY glShaderBinary (GLsizei n, const GLuint* shaders, GLenum binaryformat, const GLvoid* binary, GLsizei length); +GL_APICALL void GL_APIENTRY glShaderSource (GLuint shader, GLsizei count, const GLchar* const* string, const GLint* length); +GL_APICALL void GL_APIENTRY glStencilFunc (GLenum func, GLint ref, GLuint mask); +GL_APICALL void GL_APIENTRY glStencilFuncSeparate (GLenum face, GLenum func, GLint ref, GLuint mask); +GL_APICALL void GL_APIENTRY glStencilMask (GLuint mask); +GL_APICALL void GL_APIENTRY glStencilMaskSeparate (GLenum face, GLuint mask); +GL_APICALL void GL_APIENTRY glStencilOp (GLenum fail, GLenum zfail, GLenum zpass); +GL_APICALL void GL_APIENTRY glStencilOpSeparate (GLenum face, GLenum fail, GLenum zfail, GLenum zpass); +GL_APICALL void GL_APIENTRY glTexImage2D (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid* pixels); +GL_APICALL void GL_APIENTRY glTexParameterf (GLenum target, GLenum pname, GLfloat param); +GL_APICALL void GL_APIENTRY glTexParameterfv (GLenum target, GLenum pname, const GLfloat* params); +GL_APICALL void GL_APIENTRY glTexParameteri (GLenum target, GLenum pname, GLint param); +GL_APICALL void GL_APIENTRY glTexParameteriv (GLenum target, GLenum pname, const GLint* params); +GL_APICALL void GL_APIENTRY glTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const GLvoid* pixels); +GL_APICALL void GL_APIENTRY glUniform1f (GLint location, GLfloat x); +GL_APICALL void GL_APIENTRY glUniform1fv (GLint location, GLsizei count, const GLfloat* v); +GL_APICALL void GL_APIENTRY glUniform1i (GLint location, GLint x); +GL_APICALL void GL_APIENTRY glUniform1iv (GLint location, GLsizei count, const GLint* v); +GL_APICALL void GL_APIENTRY glUniform2f (GLint location, GLfloat x, GLfloat y); +GL_APICALL void GL_APIENTRY glUniform2fv (GLint location, GLsizei count, const GLfloat* v); +GL_APICALL void GL_APIENTRY glUniform2i (GLint location, GLint x, GLint y); +GL_APICALL void GL_APIENTRY glUniform2iv (GLint location, GLsizei count, const GLint* v); +GL_APICALL void GL_APIENTRY glUniform3f (GLint location, GLfloat x, GLfloat y, GLfloat z); +GL_APICALL void GL_APIENTRY glUniform3fv (GLint location, GLsizei count, const GLfloat* v); +GL_APICALL void GL_APIENTRY glUniform3i (GLint location, GLint x, GLint y, GLint z); +GL_APICALL void GL_APIENTRY glUniform3iv (GLint location, GLsizei count, const GLint* v); +GL_APICALL void GL_APIENTRY glUniform4f (GLint location, GLfloat x, GLfloat y, GLfloat z, GLfloat w); +GL_APICALL void GL_APIENTRY glUniform4fv (GLint location, GLsizei count, const GLfloat* v); +GL_APICALL void GL_APIENTRY glUniform4i (GLint location, GLint x, GLint y, GLint z, GLint w); +GL_APICALL void GL_APIENTRY glUniform4iv (GLint location, GLsizei count, const GLint* v); +GL_APICALL void GL_APIENTRY glUniformMatrix2fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value); +GL_APICALL void GL_APIENTRY glUniformMatrix3fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value); +GL_APICALL void GL_APIENTRY glUniformMatrix4fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value); +GL_APICALL void GL_APIENTRY glUseProgram (GLuint program); +GL_APICALL void GL_APIENTRY glValidateProgram (GLuint program); +GL_APICALL void GL_APIENTRY glVertexAttrib1f (GLuint indx, GLfloat x); +GL_APICALL void GL_APIENTRY glVertexAttrib1fv (GLuint indx, const GLfloat* values); +GL_APICALL void GL_APIENTRY glVertexAttrib2f (GLuint indx, GLfloat x, GLfloat y); +GL_APICALL void GL_APIENTRY glVertexAttrib2fv (GLuint indx, const GLfloat* values); +GL_APICALL void GL_APIENTRY glVertexAttrib3f (GLuint indx, GLfloat x, GLfloat y, GLfloat z); +GL_APICALL void GL_APIENTRY glVertexAttrib3fv (GLuint indx, const GLfloat* values); +GL_APICALL void GL_APIENTRY glVertexAttrib4f (GLuint indx, GLfloat x, GLfloat y, GLfloat z, GLfloat w); +GL_APICALL void GL_APIENTRY glVertexAttrib4fv (GLuint indx, const GLfloat* values); +GL_APICALL void GL_APIENTRY glVertexAttribPointer (GLuint indx, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const GLvoid* ptr); +GL_APICALL void GL_APIENTRY glViewport (GLint x, GLint y, GLsizei width, GLsizei height); + +/* OpenGL ES 3.0 */ + +GL_APICALL void GL_APIENTRY glReadBuffer (GLenum mode); +GL_APICALL void GL_APIENTRY glDrawRangeElements (GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid* indices); +GL_APICALL void GL_APIENTRY glTexImage3D (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const GLvoid* pixels); +GL_APICALL void GL_APIENTRY glTexSubImage3D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const GLvoid* pixels); +GL_APICALL void GL_APIENTRY glCopyTexSubImage3D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height); +GL_APICALL void GL_APIENTRY glCompressedTexImage3D (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const GLvoid* data); +GL_APICALL void GL_APIENTRY glCompressedTexSubImage3D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const GLvoid* data); +GL_APICALL void GL_APIENTRY glGenQueries (GLsizei n, GLuint* ids); +GL_APICALL void GL_APIENTRY glDeleteQueries (GLsizei n, const GLuint* ids); +GL_APICALL GLboolean GL_APIENTRY glIsQuery (GLuint id); +GL_APICALL void GL_APIENTRY glBeginQuery (GLenum target, GLuint id); +GL_APICALL void GL_APIENTRY glEndQuery (GLenum target); +GL_APICALL void GL_APIENTRY glGetQueryiv (GLenum target, GLenum pname, GLint* params); +GL_APICALL void GL_APIENTRY glGetQueryObjectuiv (GLuint id, GLenum pname, GLuint* params); +GL_APICALL GLboolean GL_APIENTRY glUnmapBuffer (GLenum target); +GL_APICALL void GL_APIENTRY glGetBufferPointerv (GLenum target, GLenum pname, GLvoid** params); +GL_APICALL void GL_APIENTRY glDrawBuffers (GLsizei n, const GLenum* bufs); +GL_APICALL void GL_APIENTRY glUniformMatrix2x3fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value); +GL_APICALL void GL_APIENTRY glUniformMatrix3x2fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value); +GL_APICALL void GL_APIENTRY glUniformMatrix2x4fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value); +GL_APICALL void GL_APIENTRY glUniformMatrix4x2fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value); +GL_APICALL void GL_APIENTRY glUniformMatrix3x4fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value); +GL_APICALL void GL_APIENTRY glUniformMatrix4x3fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat* value); +GL_APICALL void GL_APIENTRY glBlitFramebuffer (GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter); +GL_APICALL void GL_APIENTRY glRenderbufferStorageMultisample (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height); +GL_APICALL void GL_APIENTRY glFramebufferTextureLayer (GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer); +GL_APICALL GLvoid* GL_APIENTRY glMapBufferRange (GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access); +GL_APICALL void GL_APIENTRY glFlushMappedBufferRange (GLenum target, GLintptr offset, GLsizeiptr length); +GL_APICALL void GL_APIENTRY glBindVertexArray (GLuint array); +GL_APICALL void GL_APIENTRY glDeleteVertexArrays (GLsizei n, const GLuint* arrays); +GL_APICALL void GL_APIENTRY glGenVertexArrays (GLsizei n, GLuint* arrays); +GL_APICALL GLboolean GL_APIENTRY glIsVertexArray (GLuint array); +GL_APICALL void GL_APIENTRY glGetIntegeri_v (GLenum target, GLuint index, GLint* data); +GL_APICALL void GL_APIENTRY glBeginTransformFeedback (GLenum primitiveMode); +GL_APICALL void GL_APIENTRY glEndTransformFeedback (void); +GL_APICALL void GL_APIENTRY glBindBufferRange (GLenum target, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size); +GL_APICALL void GL_APIENTRY glBindBufferBase (GLenum target, GLuint index, GLuint buffer); +GL_APICALL void GL_APIENTRY glTransformFeedbackVaryings (GLuint program, GLsizei count, const GLchar* const* varyings, GLenum bufferMode); +GL_APICALL void GL_APIENTRY glGetTransformFeedbackVarying (GLuint program, GLuint index, GLsizei bufSize, GLsizei* length, GLsizei* size, GLenum* type, GLchar* name); +GL_APICALL void GL_APIENTRY glVertexAttribIPointer (GLuint index, GLint size, GLenum type, GLsizei stride, const GLvoid* pointer); +GL_APICALL void GL_APIENTRY glGetVertexAttribIiv (GLuint index, GLenum pname, GLint* params); +GL_APICALL void GL_APIENTRY glGetVertexAttribIuiv (GLuint index, GLenum pname, GLuint* params); +GL_APICALL void GL_APIENTRY glVertexAttribI4i (GLuint index, GLint x, GLint y, GLint z, GLint w); +GL_APICALL void GL_APIENTRY glVertexAttribI4ui (GLuint index, GLuint x, GLuint y, GLuint z, GLuint w); +GL_APICALL void GL_APIENTRY glVertexAttribI4iv (GLuint index, const GLint* v); +GL_APICALL void GL_APIENTRY glVertexAttribI4uiv (GLuint index, const GLuint* v); +GL_APICALL void GL_APIENTRY glGetUniformuiv (GLuint program, GLint location, GLuint* params); +GL_APICALL GLint GL_APIENTRY glGetFragDataLocation (GLuint program, const GLchar *name); +GL_APICALL void GL_APIENTRY glUniform1ui (GLint location, GLuint v0); +GL_APICALL void GL_APIENTRY glUniform2ui (GLint location, GLuint v0, GLuint v1); +GL_APICALL void GL_APIENTRY glUniform3ui (GLint location, GLuint v0, GLuint v1, GLuint v2); +GL_APICALL void GL_APIENTRY glUniform4ui (GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3); +GL_APICALL void GL_APIENTRY glUniform1uiv (GLint location, GLsizei count, const GLuint* value); +GL_APICALL void GL_APIENTRY glUniform2uiv (GLint location, GLsizei count, const GLuint* value); +GL_APICALL void GL_APIENTRY glUniform3uiv (GLint location, GLsizei count, const GLuint* value); +GL_APICALL void GL_APIENTRY glUniform4uiv (GLint location, GLsizei count, const GLuint* value); +GL_APICALL void GL_APIENTRY glClearBufferiv (GLenum buffer, GLint drawbuffer, const GLint* value); +GL_APICALL void GL_APIENTRY glClearBufferuiv (GLenum buffer, GLint drawbuffer, const GLuint* value); +GL_APICALL void GL_APIENTRY glClearBufferfv (GLenum buffer, GLint drawbuffer, const GLfloat* value); +GL_APICALL void GL_APIENTRY glClearBufferfi (GLenum buffer, GLint drawbuffer, GLfloat depth, GLint stencil); +GL_APICALL const GLubyte* GL_APIENTRY glGetStringi (GLenum name, GLuint index); +GL_APICALL void GL_APIENTRY glCopyBufferSubData (GLenum readTarget, GLenum writeTarget, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size); +GL_APICALL void GL_APIENTRY glGetUniformIndices (GLuint program, GLsizei uniformCount, const GLchar* const* uniformNames, GLuint* uniformIndices); +GL_APICALL void GL_APIENTRY glGetActiveUniformsiv (GLuint program, GLsizei uniformCount, const GLuint* uniformIndices, GLenum pname, GLint* params); +GL_APICALL GLuint GL_APIENTRY glGetUniformBlockIndex (GLuint program, const GLchar* uniformBlockName); +GL_APICALL void GL_APIENTRY glGetActiveUniformBlockiv (GLuint program, GLuint uniformBlockIndex, GLenum pname, GLint* params); +GL_APICALL void GL_APIENTRY glGetActiveUniformBlockName (GLuint program, GLuint uniformBlockIndex, GLsizei bufSize, GLsizei* length, GLchar* uniformBlockName); +GL_APICALL void GL_APIENTRY glUniformBlockBinding (GLuint program, GLuint uniformBlockIndex, GLuint uniformBlockBinding); +GL_APICALL void GL_APIENTRY glDrawArraysInstanced (GLenum mode, GLint first, GLsizei count, GLsizei instanceCount); +GL_APICALL void GL_APIENTRY glDrawElementsInstanced (GLenum mode, GLsizei count, GLenum type, const GLvoid* indices, GLsizei instanceCount); +GL_APICALL GLsync GL_APIENTRY glFenceSync (GLenum condition, GLbitfield flags); +GL_APICALL GLboolean GL_APIENTRY glIsSync (GLsync sync); +GL_APICALL void GL_APIENTRY glDeleteSync (GLsync sync); +GL_APICALL GLenum GL_APIENTRY glClientWaitSync (GLsync sync, GLbitfield flags, GLuint64 timeout); +GL_APICALL void GL_APIENTRY glWaitSync (GLsync sync, GLbitfield flags, GLuint64 timeout); +GL_APICALL void GL_APIENTRY glGetInteger64v (GLenum pname, GLint64* params); +GL_APICALL void GL_APIENTRY glGetSynciv (GLsync sync, GLenum pname, GLsizei bufSize, GLsizei* length, GLint* values); +GL_APICALL void GL_APIENTRY glGetInteger64i_v (GLenum target, GLuint index, GLint64* data); +GL_APICALL void GL_APIENTRY glGetBufferParameteri64v (GLenum target, GLenum pname, GLint64* params); +GL_APICALL void GL_APIENTRY glGenSamplers (GLsizei count, GLuint* samplers); +GL_APICALL void GL_APIENTRY glDeleteSamplers (GLsizei count, const GLuint* samplers); +GL_APICALL GLboolean GL_APIENTRY glIsSampler (GLuint sampler); +GL_APICALL void GL_APIENTRY glBindSampler (GLuint unit, GLuint sampler); +GL_APICALL void GL_APIENTRY glSamplerParameteri (GLuint sampler, GLenum pname, GLint param); +GL_APICALL void GL_APIENTRY glSamplerParameteriv (GLuint sampler, GLenum pname, const GLint* param); +GL_APICALL void GL_APIENTRY glSamplerParameterf (GLuint sampler, GLenum pname, GLfloat param); +GL_APICALL void GL_APIENTRY glSamplerParameterfv (GLuint sampler, GLenum pname, const GLfloat* param); +GL_APICALL void GL_APIENTRY glGetSamplerParameteriv (GLuint sampler, GLenum pname, GLint* params); +GL_APICALL void GL_APIENTRY glGetSamplerParameterfv (GLuint sampler, GLenum pname, GLfloat* params); +GL_APICALL void GL_APIENTRY glVertexAttribDivisor (GLuint index, GLuint divisor); +GL_APICALL void GL_APIENTRY glBindTransformFeedback (GLenum target, GLuint id); +GL_APICALL void GL_APIENTRY glDeleteTransformFeedbacks (GLsizei n, const GLuint* ids); +GL_APICALL void GL_APIENTRY glGenTransformFeedbacks (GLsizei n, GLuint* ids); +GL_APICALL GLboolean GL_APIENTRY glIsTransformFeedback (GLuint id); +GL_APICALL void GL_APIENTRY glPauseTransformFeedback (void); +GL_APICALL void GL_APIENTRY glResumeTransformFeedback (void); +GL_APICALL void GL_APIENTRY glGetProgramBinary (GLuint program, GLsizei bufSize, GLsizei* length, GLenum* binaryFormat, GLvoid* binary); +GL_APICALL void GL_APIENTRY glProgramBinary (GLuint program, GLenum binaryFormat, const GLvoid* binary, GLsizei length); +GL_APICALL void GL_APIENTRY glProgramParameteri (GLuint program, GLenum pname, GLint value); +GL_APICALL void GL_APIENTRY glInvalidateFramebuffer (GLenum target, GLsizei numAttachments, const GLenum* attachments); +GL_APICALL void GL_APIENTRY glInvalidateSubFramebuffer (GLenum target, GLsizei numAttachments, const GLenum* attachments, GLint x, GLint y, GLsizei width, GLsizei height); +GL_APICALL void GL_APIENTRY glTexStorage2D (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height); +GL_APICALL void GL_APIENTRY glTexStorage3D (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth); +GL_APICALL void GL_APIENTRY glGetInternalformativ (GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint* params); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/opengles-3.1/include/GLES3/gl31.h b/opengles-3.1/include/GLES3/gl31.h new file mode 100644 index 0000000000..987724a349 --- /dev/null +++ b/opengles-3.1/include/GLES3/gl31.h @@ -0,0 +1,1187 @@ +#ifndef __gl31_h_ +#define __gl31_h_ 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/* +** Copyright (c) 2013-2014 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ +/* +** This header is generated from the Khronos OpenGL / OpenGL ES XML +** API Registry. The current version of the Registry, generator scripts +** used to make the header, and the header can be found at +** http://www.opengl.org/registry/ +** +** Khronos $Revision$ on $Date$ +*/ + +#include +#ifndef GL_APIENTRYP +#define GL_APIENTRYP GL_APIENTRY* +#endif + +/* Generated on date 20140620 */ + +/* Generated C header for: + * API: gles2 + * Profile: common + * Versions considered: 2.[0-9]|3.[01] + * Versions emitted: .* + * Default extensions included: None + * Additional extensions included: _nomatch_^ + * Extensions removed: _nomatch_^ + */ + +#ifndef GL_ES_VERSION_2_0 +#define GL_ES_VERSION_2_0 1 +#include +typedef khronos_int8_t GLbyte; +typedef khronos_float_t GLclampf; +typedef khronos_int32_t GLfixed; +typedef short GLshort; +typedef unsigned short GLushort; +typedef void GLvoid; +typedef struct __GLsync *GLsync; +typedef khronos_int64_t GLint64; +typedef khronos_uint64_t GLuint64; +typedef unsigned int GLenum; +typedef unsigned int GLuint; +typedef char GLchar; +typedef khronos_float_t GLfloat; +typedef khronos_ssize_t GLsizeiptr; +typedef khronos_intptr_t GLintptr; +typedef unsigned int GLbitfield; +typedef int GLint; +typedef unsigned char GLboolean; +typedef int GLsizei; +typedef khronos_uint8_t GLubyte; +#define GL_DEPTH_BUFFER_BIT 0x00000100 +#define GL_STENCIL_BUFFER_BIT 0x00000400 +#define GL_COLOR_BUFFER_BIT 0x00004000 +#define GL_FALSE 0 +#define GL_TRUE 1 +#define GL_POINTS 0x0000 +#define GL_LINES 0x0001 +#define GL_LINE_LOOP 0x0002 +#define GL_LINE_STRIP 0x0003 +#define GL_TRIANGLES 0x0004 +#define GL_TRIANGLE_STRIP 0x0005 +#define GL_TRIANGLE_FAN 0x0006 +#define GL_ZERO 0 +#define GL_ONE 1 +#define GL_SRC_COLOR 0x0300 +#define GL_ONE_MINUS_SRC_COLOR 0x0301 +#define GL_SRC_ALPHA 0x0302 +#define GL_ONE_MINUS_SRC_ALPHA 0x0303 +#define GL_DST_ALPHA 0x0304 +#define GL_ONE_MINUS_DST_ALPHA 0x0305 +#define GL_DST_COLOR 0x0306 +#define GL_ONE_MINUS_DST_COLOR 0x0307 +#define GL_SRC_ALPHA_SATURATE 0x0308 +#define GL_FUNC_ADD 0x8006 +#define GL_BLEND_EQUATION 0x8009 +#define GL_BLEND_EQUATION_RGB 0x8009 +#define GL_BLEND_EQUATION_ALPHA 0x883D +#define GL_FUNC_SUBTRACT 0x800A +#define GL_FUNC_REVERSE_SUBTRACT 0x800B +#define GL_BLEND_DST_RGB 0x80C8 +#define GL_BLEND_SRC_RGB 0x80C9 +#define GL_BLEND_DST_ALPHA 0x80CA +#define GL_BLEND_SRC_ALPHA 0x80CB +#define GL_CONSTANT_COLOR 0x8001 +#define GL_ONE_MINUS_CONSTANT_COLOR 0x8002 +#define GL_CONSTANT_ALPHA 0x8003 +#define GL_ONE_MINUS_CONSTANT_ALPHA 0x8004 +#define GL_BLEND_COLOR 0x8005 +#define GL_ARRAY_BUFFER 0x8892 +#define GL_ELEMENT_ARRAY_BUFFER 0x8893 +#define GL_ARRAY_BUFFER_BINDING 0x8894 +#define GL_ELEMENT_ARRAY_BUFFER_BINDING 0x8895 +#define GL_STREAM_DRAW 0x88E0 +#define GL_STATIC_DRAW 0x88E4 +#define GL_DYNAMIC_DRAW 0x88E8 +#define GL_BUFFER_SIZE 0x8764 +#define GL_BUFFER_USAGE 0x8765 +#define GL_CURRENT_VERTEX_ATTRIB 0x8626 +#define GL_FRONT 0x0404 +#define GL_BACK 0x0405 +#define GL_FRONT_AND_BACK 0x0408 +#define GL_TEXTURE_2D 0x0DE1 +#define GL_CULL_FACE 0x0B44 +#define GL_BLEND 0x0BE2 +#define GL_DITHER 0x0BD0 +#define GL_STENCIL_TEST 0x0B90 +#define GL_DEPTH_TEST 0x0B71 +#define GL_SCISSOR_TEST 0x0C11 +#define GL_POLYGON_OFFSET_FILL 0x8037 +#define GL_SAMPLE_ALPHA_TO_COVERAGE 0x809E +#define GL_SAMPLE_COVERAGE 0x80A0 +#define GL_NO_ERROR 0 +#define GL_INVALID_ENUM 0x0500 +#define GL_INVALID_VALUE 0x0501 +#define GL_INVALID_OPERATION 0x0502 +#define GL_OUT_OF_MEMORY 0x0505 +#define GL_CW 0x0900 +#define GL_CCW 0x0901 +#define GL_LINE_WIDTH 0x0B21 +#define GL_ALIASED_POINT_SIZE_RANGE 0x846D +#define GL_ALIASED_LINE_WIDTH_RANGE 0x846E +#define GL_CULL_FACE_MODE 0x0B45 +#define GL_FRONT_FACE 0x0B46 +#define GL_DEPTH_RANGE 0x0B70 +#define GL_DEPTH_WRITEMASK 0x0B72 +#define GL_DEPTH_CLEAR_VALUE 0x0B73 +#define GL_DEPTH_FUNC 0x0B74 +#define GL_STENCIL_CLEAR_VALUE 0x0B91 +#define GL_STENCIL_FUNC 0x0B92 +#define GL_STENCIL_FAIL 0x0B94 +#define GL_STENCIL_PASS_DEPTH_FAIL 0x0B95 +#define GL_STENCIL_PASS_DEPTH_PASS 0x0B96 +#define GL_STENCIL_REF 0x0B97 +#define GL_STENCIL_VALUE_MASK 0x0B93 +#define GL_STENCIL_WRITEMASK 0x0B98 +#define GL_STENCIL_BACK_FUNC 0x8800 +#define GL_STENCIL_BACK_FAIL 0x8801 +#define GL_STENCIL_BACK_PASS_DEPTH_FAIL 0x8802 +#define GL_STENCIL_BACK_PASS_DEPTH_PASS 0x8803 +#define GL_STENCIL_BACK_REF 0x8CA3 +#define GL_STENCIL_BACK_VALUE_MASK 0x8CA4 +#define GL_STENCIL_BACK_WRITEMASK 0x8CA5 +#define GL_VIEWPORT 0x0BA2 +#define GL_SCISSOR_BOX 0x0C10 +#define GL_COLOR_CLEAR_VALUE 0x0C22 +#define GL_COLOR_WRITEMASK 0x0C23 +#define GL_UNPACK_ALIGNMENT 0x0CF5 +#define GL_PACK_ALIGNMENT 0x0D05 +#define GL_MAX_TEXTURE_SIZE 0x0D33 +#define GL_MAX_VIEWPORT_DIMS 0x0D3A +#define GL_SUBPIXEL_BITS 0x0D50 +#define GL_RED_BITS 0x0D52 +#define GL_GREEN_BITS 0x0D53 +#define GL_BLUE_BITS 0x0D54 +#define GL_ALPHA_BITS 0x0D55 +#define GL_DEPTH_BITS 0x0D56 +#define GL_STENCIL_BITS 0x0D57 +#define GL_POLYGON_OFFSET_UNITS 0x2A00 +#define GL_POLYGON_OFFSET_FACTOR 0x8038 +#define GL_TEXTURE_BINDING_2D 0x8069 +#define GL_SAMPLE_BUFFERS 0x80A8 +#define GL_SAMPLES 0x80A9 +#define GL_SAMPLE_COVERAGE_VALUE 0x80AA +#define GL_SAMPLE_COVERAGE_INVERT 0x80AB +#define GL_NUM_COMPRESSED_TEXTURE_FORMATS 0x86A2 +#define GL_COMPRESSED_TEXTURE_FORMATS 0x86A3 +#define GL_DONT_CARE 0x1100 +#define GL_FASTEST 0x1101 +#define GL_NICEST 0x1102 +#define GL_GENERATE_MIPMAP_HINT 0x8192 +#define GL_BYTE 0x1400 +#define GL_UNSIGNED_BYTE 0x1401 +#define GL_SHORT 0x1402 +#define GL_UNSIGNED_SHORT 0x1403 +#define GL_INT 0x1404 +#define GL_UNSIGNED_INT 0x1405 +#define GL_FLOAT 0x1406 +#define GL_FIXED 0x140C +#define GL_DEPTH_COMPONENT 0x1902 +#define GL_ALPHA 0x1906 +#define GL_RGB 0x1907 +#define GL_RGBA 0x1908 +#define GL_LUMINANCE 0x1909 +#define GL_LUMINANCE_ALPHA 0x190A +#define GL_UNSIGNED_SHORT_4_4_4_4 0x8033 +#define GL_UNSIGNED_SHORT_5_5_5_1 0x8034 +#define GL_UNSIGNED_SHORT_5_6_5 0x8363 +#define GL_FRAGMENT_SHADER 0x8B30 +#define GL_VERTEX_SHADER 0x8B31 +#define GL_MAX_VERTEX_ATTRIBS 0x8869 +#define GL_MAX_VERTEX_UNIFORM_VECTORS 0x8DFB +#define GL_MAX_VARYING_VECTORS 0x8DFC +#define GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS 0x8B4D +#define GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS 0x8B4C +#define GL_MAX_TEXTURE_IMAGE_UNITS 0x8872 +#define GL_MAX_FRAGMENT_UNIFORM_VECTORS 0x8DFD +#define GL_SHADER_TYPE 0x8B4F +#define GL_DELETE_STATUS 0x8B80 +#define GL_LINK_STATUS 0x8B82 +#define GL_VALIDATE_STATUS 0x8B83 +#define GL_ATTACHED_SHADERS 0x8B85 +#define GL_ACTIVE_UNIFORMS 0x8B86 +#define GL_ACTIVE_UNIFORM_MAX_LENGTH 0x8B87 +#define GL_ACTIVE_ATTRIBUTES 0x8B89 +#define GL_ACTIVE_ATTRIBUTE_MAX_LENGTH 0x8B8A +#define GL_SHADING_LANGUAGE_VERSION 0x8B8C +#define GL_CURRENT_PROGRAM 0x8B8D +#define GL_NEVER 0x0200 +#define GL_LESS 0x0201 +#define GL_EQUAL 0x0202 +#define GL_LEQUAL 0x0203 +#define GL_GREATER 0x0204 +#define GL_NOTEQUAL 0x0205 +#define GL_GEQUAL 0x0206 +#define GL_ALWAYS 0x0207 +#define GL_KEEP 0x1E00 +#define GL_REPLACE 0x1E01 +#define GL_INCR 0x1E02 +#define GL_DECR 0x1E03 +#define GL_INVERT 0x150A +#define GL_INCR_WRAP 0x8507 +#define GL_DECR_WRAP 0x8508 +#define GL_VENDOR 0x1F00 +#define GL_RENDERER 0x1F01 +#define GL_VERSION 0x1F02 +#define GL_EXTENSIONS 0x1F03 +#define GL_NEAREST 0x2600 +#define GL_LINEAR 0x2601 +#define GL_NEAREST_MIPMAP_NEAREST 0x2700 +#define GL_LINEAR_MIPMAP_NEAREST 0x2701 +#define GL_NEAREST_MIPMAP_LINEAR 0x2702 +#define GL_LINEAR_MIPMAP_LINEAR 0x2703 +#define GL_TEXTURE_MAG_FILTER 0x2800 +#define GL_TEXTURE_MIN_FILTER 0x2801 +#define GL_TEXTURE_WRAP_S 0x2802 +#define GL_TEXTURE_WRAP_T 0x2803 +#define GL_TEXTURE 0x1702 +#define GL_TEXTURE_CUBE_MAP 0x8513 +#define GL_TEXTURE_BINDING_CUBE_MAP 0x8514 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x8515 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x8516 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x8517 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x8518 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x8519 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x851A +#define GL_MAX_CUBE_MAP_TEXTURE_SIZE 0x851C +#define GL_TEXTURE0 0x84C0 +#define GL_TEXTURE1 0x84C1 +#define GL_TEXTURE2 0x84C2 +#define GL_TEXTURE3 0x84C3 +#define GL_TEXTURE4 0x84C4 +#define GL_TEXTURE5 0x84C5 +#define GL_TEXTURE6 0x84C6 +#define GL_TEXTURE7 0x84C7 +#define GL_TEXTURE8 0x84C8 +#define GL_TEXTURE9 0x84C9 +#define GL_TEXTURE10 0x84CA +#define GL_TEXTURE11 0x84CB +#define GL_TEXTURE12 0x84CC +#define GL_TEXTURE13 0x84CD +#define GL_TEXTURE14 0x84CE +#define GL_TEXTURE15 0x84CF +#define GL_TEXTURE16 0x84D0 +#define GL_TEXTURE17 0x84D1 +#define GL_TEXTURE18 0x84D2 +#define GL_TEXTURE19 0x84D3 +#define GL_TEXTURE20 0x84D4 +#define GL_TEXTURE21 0x84D5 +#define GL_TEXTURE22 0x84D6 +#define GL_TEXTURE23 0x84D7 +#define GL_TEXTURE24 0x84D8 +#define GL_TEXTURE25 0x84D9 +#define GL_TEXTURE26 0x84DA +#define GL_TEXTURE27 0x84DB +#define GL_TEXTURE28 0x84DC +#define GL_TEXTURE29 0x84DD +#define GL_TEXTURE30 0x84DE +#define GL_TEXTURE31 0x84DF +#define GL_ACTIVE_TEXTURE 0x84E0 +#define GL_REPEAT 0x2901 +#define GL_CLAMP_TO_EDGE 0x812F +#define GL_MIRRORED_REPEAT 0x8370 +#define GL_FLOAT_VEC2 0x8B50 +#define GL_FLOAT_VEC3 0x8B51 +#define GL_FLOAT_VEC4 0x8B52 +#define GL_INT_VEC2 0x8B53 +#define GL_INT_VEC3 0x8B54 +#define GL_INT_VEC4 0x8B55 +#define GL_BOOL 0x8B56 +#define GL_BOOL_VEC2 0x8B57 +#define GL_BOOL_VEC3 0x8B58 +#define GL_BOOL_VEC4 0x8B59 +#define GL_FLOAT_MAT2 0x8B5A +#define GL_FLOAT_MAT3 0x8B5B +#define GL_FLOAT_MAT4 0x8B5C +#define GL_SAMPLER_2D 0x8B5E +#define GL_SAMPLER_CUBE 0x8B60 +#define GL_VERTEX_ATTRIB_ARRAY_ENABLED 0x8622 +#define GL_VERTEX_ATTRIB_ARRAY_SIZE 0x8623 +#define GL_VERTEX_ATTRIB_ARRAY_STRIDE 0x8624 +#define GL_VERTEX_ATTRIB_ARRAY_TYPE 0x8625 +#define GL_VERTEX_ATTRIB_ARRAY_NORMALIZED 0x886A +#define GL_VERTEX_ATTRIB_ARRAY_POINTER 0x8645 +#define GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING 0x889F +#define GL_IMPLEMENTATION_COLOR_READ_TYPE 0x8B9A +#define GL_IMPLEMENTATION_COLOR_READ_FORMAT 0x8B9B +#define GL_COMPILE_STATUS 0x8B81 +#define GL_INFO_LOG_LENGTH 0x8B84 +#define GL_SHADER_SOURCE_LENGTH 0x8B88 +#define GL_SHADER_COMPILER 0x8DFA +#define GL_SHADER_BINARY_FORMATS 0x8DF8 +#define GL_NUM_SHADER_BINARY_FORMATS 0x8DF9 +#define GL_LOW_FLOAT 0x8DF0 +#define GL_MEDIUM_FLOAT 0x8DF1 +#define GL_HIGH_FLOAT 0x8DF2 +#define GL_LOW_INT 0x8DF3 +#define GL_MEDIUM_INT 0x8DF4 +#define GL_HIGH_INT 0x8DF5 +#define GL_FRAMEBUFFER 0x8D40 +#define GL_RENDERBUFFER 0x8D41 +#define GL_RGBA4 0x8056 +#define GL_RGB5_A1 0x8057 +#define GL_RGB565 0x8D62 +#define GL_DEPTH_COMPONENT16 0x81A5 +#define GL_STENCIL_INDEX8 0x8D48 +#define GL_RENDERBUFFER_WIDTH 0x8D42 +#define GL_RENDERBUFFER_HEIGHT 0x8D43 +#define GL_RENDERBUFFER_INTERNAL_FORMAT 0x8D44 +#define GL_RENDERBUFFER_RED_SIZE 0x8D50 +#define GL_RENDERBUFFER_GREEN_SIZE 0x8D51 +#define GL_RENDERBUFFER_BLUE_SIZE 0x8D52 +#define GL_RENDERBUFFER_ALPHA_SIZE 0x8D53 +#define GL_RENDERBUFFER_DEPTH_SIZE 0x8D54 +#define GL_RENDERBUFFER_STENCIL_SIZE 0x8D55 +#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE 0x8CD0 +#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME 0x8CD1 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL 0x8CD2 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE 0x8CD3 +#define GL_COLOR_ATTACHMENT0 0x8CE0 +#define GL_DEPTH_ATTACHMENT 0x8D00 +#define GL_STENCIL_ATTACHMENT 0x8D20 +#define GL_NONE 0 +#define GL_FRAMEBUFFER_COMPLETE 0x8CD5 +#define GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT 0x8CD6 +#define GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT 0x8CD7 +#define GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS 0x8CD9 +#define GL_FRAMEBUFFER_UNSUPPORTED 0x8CDD +#define GL_FRAMEBUFFER_BINDING 0x8CA6 +#define GL_RENDERBUFFER_BINDING 0x8CA7 +#define GL_MAX_RENDERBUFFER_SIZE 0x84E8 +#define GL_INVALID_FRAMEBUFFER_OPERATION 0x0506 +GL_APICALL void GL_APIENTRY glActiveTexture (GLenum texture); +GL_APICALL void GL_APIENTRY glAttachShader (GLuint program, GLuint shader); +GL_APICALL void GL_APIENTRY glBindAttribLocation (GLuint program, GLuint index, const GLchar *name); +GL_APICALL void GL_APIENTRY glBindBuffer (GLenum target, GLuint buffer); +GL_APICALL void GL_APIENTRY glBindFramebuffer (GLenum target, GLuint framebuffer); +GL_APICALL void GL_APIENTRY glBindRenderbuffer (GLenum target, GLuint renderbuffer); +GL_APICALL void GL_APIENTRY glBindTexture (GLenum target, GLuint texture); +GL_APICALL void GL_APIENTRY glBlendColor (GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha); +GL_APICALL void GL_APIENTRY glBlendEquation (GLenum mode); +GL_APICALL void GL_APIENTRY glBlendEquationSeparate (GLenum modeRGB, GLenum modeAlpha); +GL_APICALL void GL_APIENTRY glBlendFunc (GLenum sfactor, GLenum dfactor); +GL_APICALL void GL_APIENTRY glBlendFuncSeparate (GLenum sfactorRGB, GLenum dfactorRGB, GLenum sfactorAlpha, GLenum dfactorAlpha); +GL_APICALL void GL_APIENTRY glBufferData (GLenum target, GLsizeiptr size, const void *data, GLenum usage); +GL_APICALL void GL_APIENTRY glBufferSubData (GLenum target, GLintptr offset, GLsizeiptr size, const void *data); +GL_APICALL GLenum GL_APIENTRY glCheckFramebufferStatus (GLenum target); +GL_APICALL void GL_APIENTRY glClear (GLbitfield mask); +GL_APICALL void GL_APIENTRY glClearColor (GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha); +GL_APICALL void GL_APIENTRY glClearDepthf (GLfloat d); +GL_APICALL void GL_APIENTRY glClearStencil (GLint s); +GL_APICALL void GL_APIENTRY glColorMask (GLboolean red, GLboolean green, GLboolean blue, GLboolean alpha); +GL_APICALL void GL_APIENTRY glCompileShader (GLuint shader); +GL_APICALL void GL_APIENTRY glCompressedTexImage2D (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const void *data); +GL_APICALL void GL_APIENTRY glCompressedTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void *data); +GL_APICALL void GL_APIENTRY glCopyTexImage2D (GLenum target, GLint level, GLenum internalformat, GLint x, GLint y, GLsizei width, GLsizei height, GLint border); +GL_APICALL void GL_APIENTRY glCopyTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint x, GLint y, GLsizei width, GLsizei height); +GL_APICALL GLuint GL_APIENTRY glCreateProgram (void); +GL_APICALL GLuint GL_APIENTRY glCreateShader (GLenum type); +GL_APICALL void GL_APIENTRY glCullFace (GLenum mode); +GL_APICALL void GL_APIENTRY glDeleteBuffers (GLsizei n, const GLuint *buffers); +GL_APICALL void GL_APIENTRY glDeleteFramebuffers (GLsizei n, const GLuint *framebuffers); +GL_APICALL void GL_APIENTRY glDeleteProgram (GLuint program); +GL_APICALL void GL_APIENTRY glDeleteRenderbuffers (GLsizei n, const GLuint *renderbuffers); +GL_APICALL void GL_APIENTRY glDeleteShader (GLuint shader); +GL_APICALL void GL_APIENTRY glDeleteTextures (GLsizei n, const GLuint *textures); +GL_APICALL void GL_APIENTRY glDepthFunc (GLenum func); +GL_APICALL void GL_APIENTRY glDepthMask (GLboolean flag); +GL_APICALL void GL_APIENTRY glDepthRangef (GLfloat n, GLfloat f); +GL_APICALL void GL_APIENTRY glDetachShader (GLuint program, GLuint shader); +GL_APICALL void GL_APIENTRY glDisable (GLenum cap); +GL_APICALL void GL_APIENTRY glDisableVertexAttribArray (GLuint index); +GL_APICALL void GL_APIENTRY glDrawArrays (GLenum mode, GLint first, GLsizei count); +GL_APICALL void GL_APIENTRY glDrawElements (GLenum mode, GLsizei count, GLenum type, const void *indices); +GL_APICALL void GL_APIENTRY glEnable (GLenum cap); +GL_APICALL void GL_APIENTRY glEnableVertexAttribArray (GLuint index); +GL_APICALL void GL_APIENTRY glFinish (void); +GL_APICALL void GL_APIENTRY glFlush (void); +GL_APICALL void GL_APIENTRY glFramebufferRenderbuffer (GLenum target, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer); +GL_APICALL void GL_APIENTRY glFramebufferTexture2D (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); +GL_APICALL void GL_APIENTRY glFrontFace (GLenum mode); +GL_APICALL void GL_APIENTRY glGenBuffers (GLsizei n, GLuint *buffers); +GL_APICALL void GL_APIENTRY glGenerateMipmap (GLenum target); +GL_APICALL void GL_APIENTRY glGenFramebuffers (GLsizei n, GLuint *framebuffers); +GL_APICALL void GL_APIENTRY glGenRenderbuffers (GLsizei n, GLuint *renderbuffers); +GL_APICALL void GL_APIENTRY glGenTextures (GLsizei n, GLuint *textures); +GL_APICALL void GL_APIENTRY glGetActiveAttrib (GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLint *size, GLenum *type, GLchar *name); +GL_APICALL void GL_APIENTRY glGetActiveUniform (GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLint *size, GLenum *type, GLchar *name); +GL_APICALL void GL_APIENTRY glGetAttachedShaders (GLuint program, GLsizei maxCount, GLsizei *count, GLuint *shaders); +GL_APICALL GLint GL_APIENTRY glGetAttribLocation (GLuint program, const GLchar *name); +GL_APICALL void GL_APIENTRY glGetBooleanv (GLenum pname, GLboolean *data); +GL_APICALL void GL_APIENTRY glGetBufferParameteriv (GLenum target, GLenum pname, GLint *params); +GL_APICALL GLenum GL_APIENTRY glGetError (void); +GL_APICALL void GL_APIENTRY glGetFloatv (GLenum pname, GLfloat *data); +GL_APICALL void GL_APIENTRY glGetFramebufferAttachmentParameteriv (GLenum target, GLenum attachment, GLenum pname, GLint *params); +GL_APICALL void GL_APIENTRY glGetIntegerv (GLenum pname, GLint *data); +GL_APICALL void GL_APIENTRY glGetProgramiv (GLuint program, GLenum pname, GLint *params); +GL_APICALL void GL_APIENTRY glGetProgramInfoLog (GLuint program, GLsizei bufSize, GLsizei *length, GLchar *infoLog); +GL_APICALL void GL_APIENTRY glGetRenderbufferParameteriv (GLenum target, GLenum pname, GLint *params); +GL_APICALL void GL_APIENTRY glGetShaderiv (GLuint shader, GLenum pname, GLint *params); +GL_APICALL void GL_APIENTRY glGetShaderInfoLog (GLuint shader, GLsizei bufSize, GLsizei *length, GLchar *infoLog); +GL_APICALL void GL_APIENTRY glGetShaderPrecisionFormat (GLenum shadertype, GLenum precisiontype, GLint *range, GLint *precision); +GL_APICALL void GL_APIENTRY glGetShaderSource (GLuint shader, GLsizei bufSize, GLsizei *length, GLchar *source); +GL_APICALL const GLubyte *GL_APIENTRY glGetString (GLenum name); +GL_APICALL void GL_APIENTRY glGetTexParameterfv (GLenum target, GLenum pname, GLfloat *params); +GL_APICALL void GL_APIENTRY glGetTexParameteriv (GLenum target, GLenum pname, GLint *params); +GL_APICALL void GL_APIENTRY glGetUniformfv (GLuint program, GLint location, GLfloat *params); +GL_APICALL void GL_APIENTRY glGetUniformiv (GLuint program, GLint location, GLint *params); +GL_APICALL GLint GL_APIENTRY glGetUniformLocation (GLuint program, const GLchar *name); +GL_APICALL void GL_APIENTRY glGetVertexAttribfv (GLuint index, GLenum pname, GLfloat *params); +GL_APICALL void GL_APIENTRY glGetVertexAttribiv (GLuint index, GLenum pname, GLint *params); +GL_APICALL void GL_APIENTRY glGetVertexAttribPointerv (GLuint index, GLenum pname, void **pointer); +GL_APICALL void GL_APIENTRY glHint (GLenum target, GLenum mode); +GL_APICALL GLboolean GL_APIENTRY glIsBuffer (GLuint buffer); +GL_APICALL GLboolean GL_APIENTRY glIsEnabled (GLenum cap); +GL_APICALL GLboolean GL_APIENTRY glIsFramebuffer (GLuint framebuffer); +GL_APICALL GLboolean GL_APIENTRY glIsProgram (GLuint program); +GL_APICALL GLboolean GL_APIENTRY glIsRenderbuffer (GLuint renderbuffer); +GL_APICALL GLboolean GL_APIENTRY glIsShader (GLuint shader); +GL_APICALL GLboolean GL_APIENTRY glIsTexture (GLuint texture); +GL_APICALL void GL_APIENTRY glLineWidth (GLfloat width); +GL_APICALL void GL_APIENTRY glLinkProgram (GLuint program); +GL_APICALL void GL_APIENTRY glPixelStorei (GLenum pname, GLint param); +GL_APICALL void GL_APIENTRY glPolygonOffset (GLfloat factor, GLfloat units); +GL_APICALL void GL_APIENTRY glReadPixels (GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, void *pixels); +GL_APICALL void GL_APIENTRY glReleaseShaderCompiler (void); +GL_APICALL void GL_APIENTRY glRenderbufferStorage (GLenum target, GLenum internalformat, GLsizei width, GLsizei height); +GL_APICALL void GL_APIENTRY glSampleCoverage (GLfloat value, GLboolean invert); +GL_APICALL void GL_APIENTRY glScissor (GLint x, GLint y, GLsizei width, GLsizei height); +GL_APICALL void GL_APIENTRY glShaderBinary (GLsizei count, const GLuint *shaders, GLenum binaryformat, const void *binary, GLsizei length); +GL_APICALL void GL_APIENTRY glShaderSource (GLuint shader, GLsizei count, const GLchar *const*string, const GLint *length); +GL_APICALL void GL_APIENTRY glStencilFunc (GLenum func, GLint ref, GLuint mask); +GL_APICALL void GL_APIENTRY glStencilFuncSeparate (GLenum face, GLenum func, GLint ref, GLuint mask); +GL_APICALL void GL_APIENTRY glStencilMask (GLuint mask); +GL_APICALL void GL_APIENTRY glStencilMaskSeparate (GLenum face, GLuint mask); +GL_APICALL void GL_APIENTRY glStencilOp (GLenum fail, GLenum zfail, GLenum zpass); +GL_APICALL void GL_APIENTRY glStencilOpSeparate (GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass); +GL_APICALL void GL_APIENTRY glTexImage2D (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const void *pixels); +GL_APICALL void GL_APIENTRY glTexParameterf (GLenum target, GLenum pname, GLfloat param); +GL_APICALL void GL_APIENTRY glTexParameterfv (GLenum target, GLenum pname, const GLfloat *params); +GL_APICALL void GL_APIENTRY glTexParameteri (GLenum target, GLenum pname, GLint param); +GL_APICALL void GL_APIENTRY glTexParameteriv (GLenum target, GLenum pname, const GLint *params); +GL_APICALL void GL_APIENTRY glTexSubImage2D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLenum type, const void *pixels); +GL_APICALL void GL_APIENTRY glUniform1f (GLint location, GLfloat v0); +GL_APICALL void GL_APIENTRY glUniform1fv (GLint location, GLsizei count, const GLfloat *value); +GL_APICALL void GL_APIENTRY glUniform1i (GLint location, GLint v0); +GL_APICALL void GL_APIENTRY glUniform1iv (GLint location, GLsizei count, const GLint *value); +GL_APICALL void GL_APIENTRY glUniform2f (GLint location, GLfloat v0, GLfloat v1); +GL_APICALL void GL_APIENTRY glUniform2fv (GLint location, GLsizei count, const GLfloat *value); +GL_APICALL void GL_APIENTRY glUniform2i (GLint location, GLint v0, GLint v1); +GL_APICALL void GL_APIENTRY glUniform2iv (GLint location, GLsizei count, const GLint *value); +GL_APICALL void GL_APIENTRY glUniform3f (GLint location, GLfloat v0, GLfloat v1, GLfloat v2); +GL_APICALL void GL_APIENTRY glUniform3fv (GLint location, GLsizei count, const GLfloat *value); +GL_APICALL void GL_APIENTRY glUniform3i (GLint location, GLint v0, GLint v1, GLint v2); +GL_APICALL void GL_APIENTRY glUniform3iv (GLint location, GLsizei count, const GLint *value); +GL_APICALL void GL_APIENTRY glUniform4f (GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3); +GL_APICALL void GL_APIENTRY glUniform4fv (GLint location, GLsizei count, const GLfloat *value); +GL_APICALL void GL_APIENTRY glUniform4i (GLint location, GLint v0, GLint v1, GLint v2, GLint v3); +GL_APICALL void GL_APIENTRY glUniform4iv (GLint location, GLsizei count, const GLint *value); +GL_APICALL void GL_APIENTRY glUniformMatrix2fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GL_APICALL void GL_APIENTRY glUniformMatrix3fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GL_APICALL void GL_APIENTRY glUniformMatrix4fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GL_APICALL void GL_APIENTRY glUseProgram (GLuint program); +GL_APICALL void GL_APIENTRY glValidateProgram (GLuint program); +GL_APICALL void GL_APIENTRY glVertexAttrib1f (GLuint index, GLfloat x); +GL_APICALL void GL_APIENTRY glVertexAttrib1fv (GLuint index, const GLfloat *v); +GL_APICALL void GL_APIENTRY glVertexAttrib2f (GLuint index, GLfloat x, GLfloat y); +GL_APICALL void GL_APIENTRY glVertexAttrib2fv (GLuint index, const GLfloat *v); +GL_APICALL void GL_APIENTRY glVertexAttrib3f (GLuint index, GLfloat x, GLfloat y, GLfloat z); +GL_APICALL void GL_APIENTRY glVertexAttrib3fv (GLuint index, const GLfloat *v); +GL_APICALL void GL_APIENTRY glVertexAttrib4f (GLuint index, GLfloat x, GLfloat y, GLfloat z, GLfloat w); +GL_APICALL void GL_APIENTRY glVertexAttrib4fv (GLuint index, const GLfloat *v); +GL_APICALL void GL_APIENTRY glVertexAttribPointer (GLuint index, GLint size, GLenum type, GLboolean normalized, GLsizei stride, const void *pointer); +GL_APICALL void GL_APIENTRY glViewport (GLint x, GLint y, GLsizei width, GLsizei height); +#endif /* GL_ES_VERSION_2_0 */ + +#ifndef GL_ES_VERSION_3_0 +#define GL_ES_VERSION_3_0 1 +typedef unsigned short GLhalf; +#define GL_READ_BUFFER 0x0C02 +#define GL_UNPACK_ROW_LENGTH 0x0CF2 +#define GL_UNPACK_SKIP_ROWS 0x0CF3 +#define GL_UNPACK_SKIP_PIXELS 0x0CF4 +#define GL_PACK_ROW_LENGTH 0x0D02 +#define GL_PACK_SKIP_ROWS 0x0D03 +#define GL_PACK_SKIP_PIXELS 0x0D04 +#define GL_COLOR 0x1800 +#define GL_DEPTH 0x1801 +#define GL_STENCIL 0x1802 +#define GL_RED 0x1903 +#define GL_RGB8 0x8051 +#define GL_RGBA8 0x8058 +#define GL_RGB10_A2 0x8059 +#define GL_TEXTURE_BINDING_3D 0x806A +#define GL_UNPACK_SKIP_IMAGES 0x806D +#define GL_UNPACK_IMAGE_HEIGHT 0x806E +#define GL_TEXTURE_3D 0x806F +#define GL_TEXTURE_WRAP_R 0x8072 +#define GL_MAX_3D_TEXTURE_SIZE 0x8073 +#define GL_UNSIGNED_INT_2_10_10_10_REV 0x8368 +#define GL_MAX_ELEMENTS_VERTICES 0x80E8 +#define GL_MAX_ELEMENTS_INDICES 0x80E9 +#define GL_TEXTURE_MIN_LOD 0x813A +#define GL_TEXTURE_MAX_LOD 0x813B +#define GL_TEXTURE_BASE_LEVEL 0x813C +#define GL_TEXTURE_MAX_LEVEL 0x813D +#define GL_MIN 0x8007 +#define GL_MAX 0x8008 +#define GL_DEPTH_COMPONENT24 0x81A6 +#define GL_MAX_TEXTURE_LOD_BIAS 0x84FD +#define GL_TEXTURE_COMPARE_MODE 0x884C +#define GL_TEXTURE_COMPARE_FUNC 0x884D +#define GL_CURRENT_QUERY 0x8865 +#define GL_QUERY_RESULT 0x8866 +#define GL_QUERY_RESULT_AVAILABLE 0x8867 +#define GL_BUFFER_MAPPED 0x88BC +#define GL_BUFFER_MAP_POINTER 0x88BD +#define GL_STREAM_READ 0x88E1 +#define GL_STREAM_COPY 0x88E2 +#define GL_STATIC_READ 0x88E5 +#define GL_STATIC_COPY 0x88E6 +#define GL_DYNAMIC_READ 0x88E9 +#define GL_DYNAMIC_COPY 0x88EA +#define GL_MAX_DRAW_BUFFERS 0x8824 +#define GL_DRAW_BUFFER0 0x8825 +#define GL_DRAW_BUFFER1 0x8826 +#define GL_DRAW_BUFFER2 0x8827 +#define GL_DRAW_BUFFER3 0x8828 +#define GL_DRAW_BUFFER4 0x8829 +#define GL_DRAW_BUFFER5 0x882A +#define GL_DRAW_BUFFER6 0x882B +#define GL_DRAW_BUFFER7 0x882C +#define GL_DRAW_BUFFER8 0x882D +#define GL_DRAW_BUFFER9 0x882E +#define GL_DRAW_BUFFER10 0x882F +#define GL_DRAW_BUFFER11 0x8830 +#define GL_DRAW_BUFFER12 0x8831 +#define GL_DRAW_BUFFER13 0x8832 +#define GL_DRAW_BUFFER14 0x8833 +#define GL_DRAW_BUFFER15 0x8834 +#define GL_MAX_FRAGMENT_UNIFORM_COMPONENTS 0x8B49 +#define GL_MAX_VERTEX_UNIFORM_COMPONENTS 0x8B4A +#define GL_SAMPLER_3D 0x8B5F +#define GL_SAMPLER_2D_SHADOW 0x8B62 +#define GL_FRAGMENT_SHADER_DERIVATIVE_HINT 0x8B8B +#define GL_PIXEL_PACK_BUFFER 0x88EB +#define GL_PIXEL_UNPACK_BUFFER 0x88EC +#define GL_PIXEL_PACK_BUFFER_BINDING 0x88ED +#define GL_PIXEL_UNPACK_BUFFER_BINDING 0x88EF +#define GL_FLOAT_MAT2x3 0x8B65 +#define GL_FLOAT_MAT2x4 0x8B66 +#define GL_FLOAT_MAT3x2 0x8B67 +#define GL_FLOAT_MAT3x4 0x8B68 +#define GL_FLOAT_MAT4x2 0x8B69 +#define GL_FLOAT_MAT4x3 0x8B6A +#define GL_SRGB 0x8C40 +#define GL_SRGB8 0x8C41 +#define GL_SRGB8_ALPHA8 0x8C43 +#define GL_COMPARE_REF_TO_TEXTURE 0x884E +#define GL_MAJOR_VERSION 0x821B +#define GL_MINOR_VERSION 0x821C +#define GL_NUM_EXTENSIONS 0x821D +#define GL_RGBA32F 0x8814 +#define GL_RGB32F 0x8815 +#define GL_RGBA16F 0x881A +#define GL_RGB16F 0x881B +#define GL_VERTEX_ATTRIB_ARRAY_INTEGER 0x88FD +#define GL_MAX_ARRAY_TEXTURE_LAYERS 0x88FF +#define GL_MIN_PROGRAM_TEXEL_OFFSET 0x8904 +#define GL_MAX_PROGRAM_TEXEL_OFFSET 0x8905 +#define GL_MAX_VARYING_COMPONENTS 0x8B4B +#define GL_TEXTURE_2D_ARRAY 0x8C1A +#define GL_TEXTURE_BINDING_2D_ARRAY 0x8C1D +#define GL_R11F_G11F_B10F 0x8C3A +#define GL_UNSIGNED_INT_10F_11F_11F_REV 0x8C3B +#define GL_RGB9_E5 0x8C3D +#define GL_UNSIGNED_INT_5_9_9_9_REV 0x8C3E +#define GL_TRANSFORM_FEEDBACK_VARYING_MAX_LENGTH 0x8C76 +#define GL_TRANSFORM_FEEDBACK_BUFFER_MODE 0x8C7F +#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS 0x8C80 +#define GL_TRANSFORM_FEEDBACK_VARYINGS 0x8C83 +#define GL_TRANSFORM_FEEDBACK_BUFFER_START 0x8C84 +#define GL_TRANSFORM_FEEDBACK_BUFFER_SIZE 0x8C85 +#define GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN 0x8C88 +#define GL_RASTERIZER_DISCARD 0x8C89 +#define GL_MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS 0x8C8A +#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS 0x8C8B +#define GL_INTERLEAVED_ATTRIBS 0x8C8C +#define GL_SEPARATE_ATTRIBS 0x8C8D +#define GL_TRANSFORM_FEEDBACK_BUFFER 0x8C8E +#define GL_TRANSFORM_FEEDBACK_BUFFER_BINDING 0x8C8F +#define GL_RGBA32UI 0x8D70 +#define GL_RGB32UI 0x8D71 +#define GL_RGBA16UI 0x8D76 +#define GL_RGB16UI 0x8D77 +#define GL_RGBA8UI 0x8D7C +#define GL_RGB8UI 0x8D7D +#define GL_RGBA32I 0x8D82 +#define GL_RGB32I 0x8D83 +#define GL_RGBA16I 0x8D88 +#define GL_RGB16I 0x8D89 +#define GL_RGBA8I 0x8D8E +#define GL_RGB8I 0x8D8F +#define GL_RED_INTEGER 0x8D94 +#define GL_RGB_INTEGER 0x8D98 +#define GL_RGBA_INTEGER 0x8D99 +#define GL_SAMPLER_2D_ARRAY 0x8DC1 +#define GL_SAMPLER_2D_ARRAY_SHADOW 0x8DC4 +#define GL_SAMPLER_CUBE_SHADOW 0x8DC5 +#define GL_UNSIGNED_INT_VEC2 0x8DC6 +#define GL_UNSIGNED_INT_VEC3 0x8DC7 +#define GL_UNSIGNED_INT_VEC4 0x8DC8 +#define GL_INT_SAMPLER_2D 0x8DCA +#define GL_INT_SAMPLER_3D 0x8DCB +#define GL_INT_SAMPLER_CUBE 0x8DCC +#define GL_INT_SAMPLER_2D_ARRAY 0x8DCF +#define GL_UNSIGNED_INT_SAMPLER_2D 0x8DD2 +#define GL_UNSIGNED_INT_SAMPLER_3D 0x8DD3 +#define GL_UNSIGNED_INT_SAMPLER_CUBE 0x8DD4 +#define GL_UNSIGNED_INT_SAMPLER_2D_ARRAY 0x8DD7 +#define GL_BUFFER_ACCESS_FLAGS 0x911F +#define GL_BUFFER_MAP_LENGTH 0x9120 +#define GL_BUFFER_MAP_OFFSET 0x9121 +#define GL_DEPTH_COMPONENT32F 0x8CAC +#define GL_DEPTH32F_STENCIL8 0x8CAD +#define GL_FLOAT_32_UNSIGNED_INT_24_8_REV 0x8DAD +#define GL_FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING 0x8210 +#define GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE 0x8211 +#define GL_FRAMEBUFFER_ATTACHMENT_RED_SIZE 0x8212 +#define GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE 0x8213 +#define GL_FRAMEBUFFER_ATTACHMENT_BLUE_SIZE 0x8214 +#define GL_FRAMEBUFFER_ATTACHMENT_ALPHA_SIZE 0x8215 +#define GL_FRAMEBUFFER_ATTACHMENT_DEPTH_SIZE 0x8216 +#define GL_FRAMEBUFFER_ATTACHMENT_STENCIL_SIZE 0x8217 +#define GL_FRAMEBUFFER_DEFAULT 0x8218 +#define GL_FRAMEBUFFER_UNDEFINED 0x8219 +#define GL_DEPTH_STENCIL_ATTACHMENT 0x821A +#define GL_DEPTH_STENCIL 0x84F9 +#define GL_UNSIGNED_INT_24_8 0x84FA +#define GL_DEPTH24_STENCIL8 0x88F0 +#define GL_UNSIGNED_NORMALIZED 0x8C17 +#define GL_DRAW_FRAMEBUFFER_BINDING 0x8CA6 +#define GL_READ_FRAMEBUFFER 0x8CA8 +#define GL_DRAW_FRAMEBUFFER 0x8CA9 +#define GL_READ_FRAMEBUFFER_BINDING 0x8CAA +#define GL_RENDERBUFFER_SAMPLES 0x8CAB +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LAYER 0x8CD4 +#define GL_MAX_COLOR_ATTACHMENTS 0x8CDF +#define GL_COLOR_ATTACHMENT1 0x8CE1 +#define GL_COLOR_ATTACHMENT2 0x8CE2 +#define GL_COLOR_ATTACHMENT3 0x8CE3 +#define GL_COLOR_ATTACHMENT4 0x8CE4 +#define GL_COLOR_ATTACHMENT5 0x8CE5 +#define GL_COLOR_ATTACHMENT6 0x8CE6 +#define GL_COLOR_ATTACHMENT7 0x8CE7 +#define GL_COLOR_ATTACHMENT8 0x8CE8 +#define GL_COLOR_ATTACHMENT9 0x8CE9 +#define GL_COLOR_ATTACHMENT10 0x8CEA +#define GL_COLOR_ATTACHMENT11 0x8CEB +#define GL_COLOR_ATTACHMENT12 0x8CEC +#define GL_COLOR_ATTACHMENT13 0x8CED +#define GL_COLOR_ATTACHMENT14 0x8CEE +#define GL_COLOR_ATTACHMENT15 0x8CEF +#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE 0x8D56 +#define GL_MAX_SAMPLES 0x8D57 +#define GL_HALF_FLOAT 0x140B +#define GL_MAP_READ_BIT 0x0001 +#define GL_MAP_WRITE_BIT 0x0002 +#define GL_MAP_INVALIDATE_RANGE_BIT 0x0004 +#define GL_MAP_INVALIDATE_BUFFER_BIT 0x0008 +#define GL_MAP_FLUSH_EXPLICIT_BIT 0x0010 +#define GL_MAP_UNSYNCHRONIZED_BIT 0x0020 +#define GL_RG 0x8227 +#define GL_RG_INTEGER 0x8228 +#define GL_R8 0x8229 +#define GL_RG8 0x822B +#define GL_R16F 0x822D +#define GL_R32F 0x822E +#define GL_RG16F 0x822F +#define GL_RG32F 0x8230 +#define GL_R8I 0x8231 +#define GL_R8UI 0x8232 +#define GL_R16I 0x8233 +#define GL_R16UI 0x8234 +#define GL_R32I 0x8235 +#define GL_R32UI 0x8236 +#define GL_RG8I 0x8237 +#define GL_RG8UI 0x8238 +#define GL_RG16I 0x8239 +#define GL_RG16UI 0x823A +#define GL_RG32I 0x823B +#define GL_RG32UI 0x823C +#define GL_VERTEX_ARRAY_BINDING 0x85B5 +#define GL_R8_SNORM 0x8F94 +#define GL_RG8_SNORM 0x8F95 +#define GL_RGB8_SNORM 0x8F96 +#define GL_RGBA8_SNORM 0x8F97 +#define GL_SIGNED_NORMALIZED 0x8F9C +#define GL_PRIMITIVE_RESTART_FIXED_INDEX 0x8D69 +#define GL_COPY_READ_BUFFER 0x8F36 +#define GL_COPY_WRITE_BUFFER 0x8F37 +#define GL_COPY_READ_BUFFER_BINDING 0x8F36 +#define GL_COPY_WRITE_BUFFER_BINDING 0x8F37 +#define GL_UNIFORM_BUFFER 0x8A11 +#define GL_UNIFORM_BUFFER_BINDING 0x8A28 +#define GL_UNIFORM_BUFFER_START 0x8A29 +#define GL_UNIFORM_BUFFER_SIZE 0x8A2A +#define GL_MAX_VERTEX_UNIFORM_BLOCKS 0x8A2B +#define GL_MAX_FRAGMENT_UNIFORM_BLOCKS 0x8A2D +#define GL_MAX_COMBINED_UNIFORM_BLOCKS 0x8A2E +#define GL_MAX_UNIFORM_BUFFER_BINDINGS 0x8A2F +#define GL_MAX_UNIFORM_BLOCK_SIZE 0x8A30 +#define GL_MAX_COMBINED_VERTEX_UNIFORM_COMPONENTS 0x8A31 +#define GL_MAX_COMBINED_FRAGMENT_UNIFORM_COMPONENTS 0x8A33 +#define GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT 0x8A34 +#define GL_ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH 0x8A35 +#define GL_ACTIVE_UNIFORM_BLOCKS 0x8A36 +#define GL_UNIFORM_TYPE 0x8A37 +#define GL_UNIFORM_SIZE 0x8A38 +#define GL_UNIFORM_NAME_LENGTH 0x8A39 +#define GL_UNIFORM_BLOCK_INDEX 0x8A3A +#define GL_UNIFORM_OFFSET 0x8A3B +#define GL_UNIFORM_ARRAY_STRIDE 0x8A3C +#define GL_UNIFORM_MATRIX_STRIDE 0x8A3D +#define GL_UNIFORM_IS_ROW_MAJOR 0x8A3E +#define GL_UNIFORM_BLOCK_BINDING 0x8A3F +#define GL_UNIFORM_BLOCK_DATA_SIZE 0x8A40 +#define GL_UNIFORM_BLOCK_NAME_LENGTH 0x8A41 +#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORMS 0x8A42 +#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORM_INDICES 0x8A43 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_VERTEX_SHADER 0x8A44 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_FRAGMENT_SHADER 0x8A46 +#define GL_INVALID_INDEX 0xFFFFFFFFu +#define GL_MAX_VERTEX_OUTPUT_COMPONENTS 0x9122 +#define GL_MAX_FRAGMENT_INPUT_COMPONENTS 0x9125 +#define GL_MAX_SERVER_WAIT_TIMEOUT 0x9111 +#define GL_OBJECT_TYPE 0x9112 +#define GL_SYNC_CONDITION 0x9113 +#define GL_SYNC_STATUS 0x9114 +#define GL_SYNC_FLAGS 0x9115 +#define GL_SYNC_FENCE 0x9116 +#define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117 +#define GL_UNSIGNALED 0x9118 +#define GL_SIGNALED 0x9119 +#define GL_ALREADY_SIGNALED 0x911A +#define GL_TIMEOUT_EXPIRED 0x911B +#define GL_CONDITION_SATISFIED 0x911C +#define GL_WAIT_FAILED 0x911D +#define GL_SYNC_FLUSH_COMMANDS_BIT 0x00000001 +#define GL_TIMEOUT_IGNORED 0xFFFFFFFFFFFFFFFFull +#define GL_VERTEX_ATTRIB_ARRAY_DIVISOR 0x88FE +#define GL_ANY_SAMPLES_PASSED 0x8C2F +#define GL_ANY_SAMPLES_PASSED_CONSERVATIVE 0x8D6A +#define GL_SAMPLER_BINDING 0x8919 +#define GL_RGB10_A2UI 0x906F +#define GL_TEXTURE_SWIZZLE_R 0x8E42 +#define GL_TEXTURE_SWIZZLE_G 0x8E43 +#define GL_TEXTURE_SWIZZLE_B 0x8E44 +#define GL_TEXTURE_SWIZZLE_A 0x8E45 +#define GL_GREEN 0x1904 +#define GL_BLUE 0x1905 +#define GL_INT_2_10_10_10_REV 0x8D9F +#define GL_TRANSFORM_FEEDBACK 0x8E22 +#define GL_TRANSFORM_FEEDBACK_PAUSED 0x8E23 +#define GL_TRANSFORM_FEEDBACK_ACTIVE 0x8E24 +#define GL_TRANSFORM_FEEDBACK_BINDING 0x8E25 +#define GL_PROGRAM_BINARY_RETRIEVABLE_HINT 0x8257 +#define GL_PROGRAM_BINARY_LENGTH 0x8741 +#define GL_NUM_PROGRAM_BINARY_FORMATS 0x87FE +#define GL_PROGRAM_BINARY_FORMATS 0x87FF +#define GL_COMPRESSED_R11_EAC 0x9270 +#define GL_COMPRESSED_SIGNED_R11_EAC 0x9271 +#define GL_COMPRESSED_RG11_EAC 0x9272 +#define GL_COMPRESSED_SIGNED_RG11_EAC 0x9273 +#define GL_COMPRESSED_RGB8_ETC2 0x9274 +#define GL_COMPRESSED_SRGB8_ETC2 0x9275 +#define GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2 0x9276 +#define GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2 0x9277 +#define GL_COMPRESSED_RGBA8_ETC2_EAC 0x9278 +#define GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC 0x9279 +#define GL_TEXTURE_IMMUTABLE_FORMAT 0x912F +#define GL_MAX_ELEMENT_INDEX 0x8D6B +#define GL_NUM_SAMPLE_COUNTS 0x9380 +#define GL_TEXTURE_IMMUTABLE_LEVELS 0x82DF +GL_APICALL void GL_APIENTRY glReadBuffer (GLenum mode); +GL_APICALL void GL_APIENTRY glDrawRangeElements (GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const void *indices); +GL_APICALL void GL_APIENTRY glTexImage3D (GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLenum format, GLenum type, const void *pixels); +GL_APICALL void GL_APIENTRY glTexSubImage3D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLenum type, const void *pixels); +GL_APICALL void GL_APIENTRY glCopyTexSubImage3D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLint x, GLint y, GLsizei width, GLsizei height); +GL_APICALL void GL_APIENTRY glCompressedTexImage3D (GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLint border, GLsizei imageSize, const void *data); +GL_APICALL void GL_APIENTRY glCompressedTexSubImage3D (GLenum target, GLint level, GLint xoffset, GLint yoffset, GLint zoffset, GLsizei width, GLsizei height, GLsizei depth, GLenum format, GLsizei imageSize, const void *data); +GL_APICALL void GL_APIENTRY glGenQueries (GLsizei n, GLuint *ids); +GL_APICALL void GL_APIENTRY glDeleteQueries (GLsizei n, const GLuint *ids); +GL_APICALL GLboolean GL_APIENTRY glIsQuery (GLuint id); +GL_APICALL void GL_APIENTRY glBeginQuery (GLenum target, GLuint id); +GL_APICALL void GL_APIENTRY glEndQuery (GLenum target); +GL_APICALL void GL_APIENTRY glGetQueryiv (GLenum target, GLenum pname, GLint *params); +GL_APICALL void GL_APIENTRY glGetQueryObjectuiv (GLuint id, GLenum pname, GLuint *params); +GL_APICALL GLboolean GL_APIENTRY glUnmapBuffer (GLenum target); +GL_APICALL void GL_APIENTRY glGetBufferPointerv (GLenum target, GLenum pname, void **params); +GL_APICALL void GL_APIENTRY glDrawBuffers (GLsizei n, const GLenum *bufs); +GL_APICALL void GL_APIENTRY glUniformMatrix2x3fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GL_APICALL void GL_APIENTRY glUniformMatrix3x2fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GL_APICALL void GL_APIENTRY glUniformMatrix2x4fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GL_APICALL void GL_APIENTRY glUniformMatrix4x2fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GL_APICALL void GL_APIENTRY glUniformMatrix3x4fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GL_APICALL void GL_APIENTRY glUniformMatrix4x3fv (GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GL_APICALL void GL_APIENTRY glBlitFramebuffer (GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter); +GL_APICALL void GL_APIENTRY glRenderbufferStorageMultisample (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height); +GL_APICALL void GL_APIENTRY glFramebufferTextureLayer (GLenum target, GLenum attachment, GLuint texture, GLint level, GLint layer); +GL_APICALL void *GL_APIENTRY glMapBufferRange (GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access); +GL_APICALL void GL_APIENTRY glFlushMappedBufferRange (GLenum target, GLintptr offset, GLsizeiptr length); +GL_APICALL void GL_APIENTRY glBindVertexArray (GLuint array); +GL_APICALL void GL_APIENTRY glDeleteVertexArrays (GLsizei n, const GLuint *arrays); +GL_APICALL void GL_APIENTRY glGenVertexArrays (GLsizei n, GLuint *arrays); +GL_APICALL GLboolean GL_APIENTRY glIsVertexArray (GLuint array); +GL_APICALL void GL_APIENTRY glGetIntegeri_v (GLenum target, GLuint index, GLint *data); +GL_APICALL void GL_APIENTRY glBeginTransformFeedback (GLenum primitiveMode); +GL_APICALL void GL_APIENTRY glEndTransformFeedback (void); +GL_APICALL void GL_APIENTRY glBindBufferRange (GLenum target, GLuint index, GLuint buffer, GLintptr offset, GLsizeiptr size); +GL_APICALL void GL_APIENTRY glBindBufferBase (GLenum target, GLuint index, GLuint buffer); +GL_APICALL void GL_APIENTRY glTransformFeedbackVaryings (GLuint program, GLsizei count, const GLchar *const*varyings, GLenum bufferMode); +GL_APICALL void GL_APIENTRY glGetTransformFeedbackVarying (GLuint program, GLuint index, GLsizei bufSize, GLsizei *length, GLsizei *size, GLenum *type, GLchar *name); +GL_APICALL void GL_APIENTRY glVertexAttribIPointer (GLuint index, GLint size, GLenum type, GLsizei stride, const void *pointer); +GL_APICALL void GL_APIENTRY glGetVertexAttribIiv (GLuint index, GLenum pname, GLint *params); +GL_APICALL void GL_APIENTRY glGetVertexAttribIuiv (GLuint index, GLenum pname, GLuint *params); +GL_APICALL void GL_APIENTRY glVertexAttribI4i (GLuint index, GLint x, GLint y, GLint z, GLint w); +GL_APICALL void GL_APIENTRY glVertexAttribI4ui (GLuint index, GLuint x, GLuint y, GLuint z, GLuint w); +GL_APICALL void GL_APIENTRY glVertexAttribI4iv (GLuint index, const GLint *v); +GL_APICALL void GL_APIENTRY glVertexAttribI4uiv (GLuint index, const GLuint *v); +GL_APICALL void GL_APIENTRY glGetUniformuiv (GLuint program, GLint location, GLuint *params); +GL_APICALL GLint GL_APIENTRY glGetFragDataLocation (GLuint program, const GLchar *name); +GL_APICALL void GL_APIENTRY glUniform1ui (GLint location, GLuint v0); +GL_APICALL void GL_APIENTRY glUniform2ui (GLint location, GLuint v0, GLuint v1); +GL_APICALL void GL_APIENTRY glUniform3ui (GLint location, GLuint v0, GLuint v1, GLuint v2); +GL_APICALL void GL_APIENTRY glUniform4ui (GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3); +GL_APICALL void GL_APIENTRY glUniform1uiv (GLint location, GLsizei count, const GLuint *value); +GL_APICALL void GL_APIENTRY glUniform2uiv (GLint location, GLsizei count, const GLuint *value); +GL_APICALL void GL_APIENTRY glUniform3uiv (GLint location, GLsizei count, const GLuint *value); +GL_APICALL void GL_APIENTRY glUniform4uiv (GLint location, GLsizei count, const GLuint *value); +GL_APICALL void GL_APIENTRY glClearBufferiv (GLenum buffer, GLint drawbuffer, const GLint *value); +GL_APICALL void GL_APIENTRY glClearBufferuiv (GLenum buffer, GLint drawbuffer, const GLuint *value); +GL_APICALL void GL_APIENTRY glClearBufferfv (GLenum buffer, GLint drawbuffer, const GLfloat *value); +GL_APICALL void GL_APIENTRY glClearBufferfi (GLenum buffer, GLint drawbuffer, GLfloat depth, GLint stencil); +GL_APICALL const GLubyte *GL_APIENTRY glGetStringi (GLenum name, GLuint index); +GL_APICALL void GL_APIENTRY glCopyBufferSubData (GLenum readTarget, GLenum writeTarget, GLintptr readOffset, GLintptr writeOffset, GLsizeiptr size); +GL_APICALL void GL_APIENTRY glGetUniformIndices (GLuint program, GLsizei uniformCount, const GLchar *const*uniformNames, GLuint *uniformIndices); +GL_APICALL void GL_APIENTRY glGetActiveUniformsiv (GLuint program, GLsizei uniformCount, const GLuint *uniformIndices, GLenum pname, GLint *params); +GL_APICALL GLuint GL_APIENTRY glGetUniformBlockIndex (GLuint program, const GLchar *uniformBlockName); +GL_APICALL void GL_APIENTRY glGetActiveUniformBlockiv (GLuint program, GLuint uniformBlockIndex, GLenum pname, GLint *params); +GL_APICALL void GL_APIENTRY glGetActiveUniformBlockName (GLuint program, GLuint uniformBlockIndex, GLsizei bufSize, GLsizei *length, GLchar *uniformBlockName); +GL_APICALL void GL_APIENTRY glUniformBlockBinding (GLuint program, GLuint uniformBlockIndex, GLuint uniformBlockBinding); +GL_APICALL void GL_APIENTRY glDrawArraysInstanced (GLenum mode, GLint first, GLsizei count, GLsizei instancecount); +GL_APICALL void GL_APIENTRY glDrawElementsInstanced (GLenum mode, GLsizei count, GLenum type, const void *indices, GLsizei instancecount); +GL_APICALL GLsync GL_APIENTRY glFenceSync (GLenum condition, GLbitfield flags); +GL_APICALL GLboolean GL_APIENTRY glIsSync (GLsync sync); +GL_APICALL void GL_APIENTRY glDeleteSync (GLsync sync); +GL_APICALL GLenum GL_APIENTRY glClientWaitSync (GLsync sync, GLbitfield flags, GLuint64 timeout); +GL_APICALL void GL_APIENTRY glWaitSync (GLsync sync, GLbitfield flags, GLuint64 timeout); +GL_APICALL void GL_APIENTRY glGetInteger64v (GLenum pname, GLint64 *data); +GL_APICALL void GL_APIENTRY glGetSynciv (GLsync sync, GLenum pname, GLsizei bufSize, GLsizei *length, GLint *values); +GL_APICALL void GL_APIENTRY glGetInteger64i_v (GLenum target, GLuint index, GLint64 *data); +GL_APICALL void GL_APIENTRY glGetBufferParameteri64v (GLenum target, GLenum pname, GLint64 *params); +GL_APICALL void GL_APIENTRY glGenSamplers (GLsizei count, GLuint *samplers); +GL_APICALL void GL_APIENTRY glDeleteSamplers (GLsizei count, const GLuint *samplers); +GL_APICALL GLboolean GL_APIENTRY glIsSampler (GLuint sampler); +GL_APICALL void GL_APIENTRY glBindSampler (GLuint unit, GLuint sampler); +GL_APICALL void GL_APIENTRY glSamplerParameteri (GLuint sampler, GLenum pname, GLint param); +GL_APICALL void GL_APIENTRY glSamplerParameteriv (GLuint sampler, GLenum pname, const GLint *param); +GL_APICALL void GL_APIENTRY glSamplerParameterf (GLuint sampler, GLenum pname, GLfloat param); +GL_APICALL void GL_APIENTRY glSamplerParameterfv (GLuint sampler, GLenum pname, const GLfloat *param); +GL_APICALL void GL_APIENTRY glGetSamplerParameteriv (GLuint sampler, GLenum pname, GLint *params); +GL_APICALL void GL_APIENTRY glGetSamplerParameterfv (GLuint sampler, GLenum pname, GLfloat *params); +GL_APICALL void GL_APIENTRY glVertexAttribDivisor (GLuint index, GLuint divisor); +GL_APICALL void GL_APIENTRY glBindTransformFeedback (GLenum target, GLuint id); +GL_APICALL void GL_APIENTRY glDeleteTransformFeedbacks (GLsizei n, const GLuint *ids); +GL_APICALL void GL_APIENTRY glGenTransformFeedbacks (GLsizei n, GLuint *ids); +GL_APICALL GLboolean GL_APIENTRY glIsTransformFeedback (GLuint id); +GL_APICALL void GL_APIENTRY glPauseTransformFeedback (void); +GL_APICALL void GL_APIENTRY glResumeTransformFeedback (void); +GL_APICALL void GL_APIENTRY glGetProgramBinary (GLuint program, GLsizei bufSize, GLsizei *length, GLenum *binaryFormat, void *binary); +GL_APICALL void GL_APIENTRY glProgramBinary (GLuint program, GLenum binaryFormat, const void *binary, GLsizei length); +GL_APICALL void GL_APIENTRY glProgramParameteri (GLuint program, GLenum pname, GLint value); +GL_APICALL void GL_APIENTRY glInvalidateFramebuffer (GLenum target, GLsizei numAttachments, const GLenum *attachments); +GL_APICALL void GL_APIENTRY glInvalidateSubFramebuffer (GLenum target, GLsizei numAttachments, const GLenum *attachments, GLint x, GLint y, GLsizei width, GLsizei height); +GL_APICALL void GL_APIENTRY glTexStorage2D (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height); +GL_APICALL void GL_APIENTRY glTexStorage3D (GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth); +GL_APICALL void GL_APIENTRY glGetInternalformativ (GLenum target, GLenum internalformat, GLenum pname, GLsizei bufSize, GLint *params); +#endif /* GL_ES_VERSION_3_0 */ + +#ifndef GL_ES_VERSION_3_1 +#define GL_ES_VERSION_3_1 1 +#define GL_COMPUTE_SHADER 0x91B9 +#define GL_MAX_COMPUTE_UNIFORM_BLOCKS 0x91BB +#define GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS 0x91BC +#define GL_MAX_COMPUTE_IMAGE_UNIFORMS 0x91BD +#define GL_MAX_COMPUTE_SHARED_MEMORY_SIZE 0x8262 +#define GL_MAX_COMPUTE_UNIFORM_COMPONENTS 0x8263 +#define GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS 0x8264 +#define GL_MAX_COMPUTE_ATOMIC_COUNTERS 0x8265 +#define GL_MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS 0x8266 +#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB +#define GL_MAX_COMPUTE_WORK_GROUP_COUNT 0x91BE +#define GL_MAX_COMPUTE_WORK_GROUP_SIZE 0x91BF +#define GL_COMPUTE_WORK_GROUP_SIZE 0x8267 +#define GL_DISPATCH_INDIRECT_BUFFER 0x90EE +#define GL_DISPATCH_INDIRECT_BUFFER_BINDING 0x90EF +#define GL_COMPUTE_SHADER_BIT 0x00000020 +#define GL_DRAW_INDIRECT_BUFFER 0x8F3F +#define GL_DRAW_INDIRECT_BUFFER_BINDING 0x8F43 +#define GL_MAX_UNIFORM_LOCATIONS 0x826E +#define GL_FRAMEBUFFER_DEFAULT_WIDTH 0x9310 +#define GL_FRAMEBUFFER_DEFAULT_HEIGHT 0x9311 +#define GL_FRAMEBUFFER_DEFAULT_SAMPLES 0x9313 +#define GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS 0x9314 +#define GL_MAX_FRAMEBUFFER_WIDTH 0x9315 +#define GL_MAX_FRAMEBUFFER_HEIGHT 0x9316 +#define GL_MAX_FRAMEBUFFER_SAMPLES 0x9318 +#define GL_UNIFORM 0x92E1 +#define GL_UNIFORM_BLOCK 0x92E2 +#define GL_PROGRAM_INPUT 0x92E3 +#define GL_PROGRAM_OUTPUT 0x92E4 +#define GL_BUFFER_VARIABLE 0x92E5 +#define GL_SHADER_STORAGE_BLOCK 0x92E6 +#define GL_ATOMIC_COUNTER_BUFFER 0x92C0 +#define GL_TRANSFORM_FEEDBACK_VARYING 0x92F4 +#define GL_ACTIVE_RESOURCES 0x92F5 +#define GL_MAX_NAME_LENGTH 0x92F6 +#define GL_MAX_NUM_ACTIVE_VARIABLES 0x92F7 +#define GL_NAME_LENGTH 0x92F9 +#define GL_TYPE 0x92FA +#define GL_ARRAY_SIZE 0x92FB +#define GL_OFFSET 0x92FC +#define GL_BLOCK_INDEX 0x92FD +#define GL_ARRAY_STRIDE 0x92FE +#define GL_MATRIX_STRIDE 0x92FF +#define GL_IS_ROW_MAJOR 0x9300 +#define GL_ATOMIC_COUNTER_BUFFER_INDEX 0x9301 +#define GL_BUFFER_BINDING 0x9302 +#define GL_BUFFER_DATA_SIZE 0x9303 +#define GL_NUM_ACTIVE_VARIABLES 0x9304 +#define GL_ACTIVE_VARIABLES 0x9305 +#define GL_REFERENCED_BY_VERTEX_SHADER 0x9306 +#define GL_REFERENCED_BY_FRAGMENT_SHADER 0x930A +#define GL_REFERENCED_BY_COMPUTE_SHADER 0x930B +#define GL_TOP_LEVEL_ARRAY_SIZE 0x930C +#define GL_TOP_LEVEL_ARRAY_STRIDE 0x930D +#define GL_LOCATION 0x930E +#define GL_VERTEX_SHADER_BIT 0x00000001 +#define GL_FRAGMENT_SHADER_BIT 0x00000002 +#define GL_ALL_SHADER_BITS 0xFFFFFFFF +#define GL_PROGRAM_SEPARABLE 0x8258 +#define GL_ACTIVE_PROGRAM 0x8259 +#define GL_PROGRAM_PIPELINE_BINDING 0x825A +#define GL_ATOMIC_COUNTER_BUFFER_BINDING 0x92C1 +#define GL_ATOMIC_COUNTER_BUFFER_START 0x92C2 +#define GL_ATOMIC_COUNTER_BUFFER_SIZE 0x92C3 +#define GL_MAX_VERTEX_ATOMIC_COUNTER_BUFFERS 0x92CC +#define GL_MAX_FRAGMENT_ATOMIC_COUNTER_BUFFERS 0x92D0 +#define GL_MAX_COMBINED_ATOMIC_COUNTER_BUFFERS 0x92D1 +#define GL_MAX_VERTEX_ATOMIC_COUNTERS 0x92D2 +#define GL_MAX_FRAGMENT_ATOMIC_COUNTERS 0x92D6 +#define GL_MAX_COMBINED_ATOMIC_COUNTERS 0x92D7 +#define GL_MAX_ATOMIC_COUNTER_BUFFER_SIZE 0x92D8 +#define GL_MAX_ATOMIC_COUNTER_BUFFER_BINDINGS 0x92DC +#define GL_ACTIVE_ATOMIC_COUNTER_BUFFERS 0x92D9 +#define GL_UNSIGNED_INT_ATOMIC_COUNTER 0x92DB +#define GL_MAX_IMAGE_UNITS 0x8F38 +#define GL_MAX_VERTEX_IMAGE_UNIFORMS 0x90CA +#define GL_MAX_FRAGMENT_IMAGE_UNIFORMS 0x90CE +#define GL_MAX_COMBINED_IMAGE_UNIFORMS 0x90CF +#define GL_IMAGE_BINDING_NAME 0x8F3A +#define GL_IMAGE_BINDING_LEVEL 0x8F3B +#define GL_IMAGE_BINDING_LAYERED 0x8F3C +#define GL_IMAGE_BINDING_LAYER 0x8F3D +#define GL_IMAGE_BINDING_ACCESS 0x8F3E +#define GL_IMAGE_BINDING_FORMAT 0x906E +#define GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT 0x00000001 +#define GL_ELEMENT_ARRAY_BARRIER_BIT 0x00000002 +#define GL_UNIFORM_BARRIER_BIT 0x00000004 +#define GL_TEXTURE_FETCH_BARRIER_BIT 0x00000008 +#define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020 +#define GL_COMMAND_BARRIER_BIT 0x00000040 +#define GL_PIXEL_BUFFER_BARRIER_BIT 0x00000080 +#define GL_TEXTURE_UPDATE_BARRIER_BIT 0x00000100 +#define GL_BUFFER_UPDATE_BARRIER_BIT 0x00000200 +#define GL_FRAMEBUFFER_BARRIER_BIT 0x00000400 +#define GL_TRANSFORM_FEEDBACK_BARRIER_BIT 0x00000800 +#define GL_ATOMIC_COUNTER_BARRIER_BIT 0x00001000 +#define GL_ALL_BARRIER_BITS 0xFFFFFFFF +#define GL_IMAGE_2D 0x904D +#define GL_IMAGE_3D 0x904E +#define GL_IMAGE_CUBE 0x9050 +#define GL_IMAGE_2D_ARRAY 0x9053 +#define GL_INT_IMAGE_2D 0x9058 +#define GL_INT_IMAGE_3D 0x9059 +#define GL_INT_IMAGE_CUBE 0x905B +#define GL_INT_IMAGE_2D_ARRAY 0x905E +#define GL_UNSIGNED_INT_IMAGE_2D 0x9063 +#define GL_UNSIGNED_INT_IMAGE_3D 0x9064 +#define GL_UNSIGNED_INT_IMAGE_CUBE 0x9066 +#define GL_UNSIGNED_INT_IMAGE_2D_ARRAY 0x9069 +#define GL_IMAGE_FORMAT_COMPATIBILITY_TYPE 0x90C7 +#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE 0x90C8 +#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_CLASS 0x90C9 +#define GL_READ_ONLY 0x88B8 +#define GL_WRITE_ONLY 0x88B9 +#define GL_READ_WRITE 0x88BA +#define GL_SHADER_STORAGE_BUFFER 0x90D2 +#define GL_SHADER_STORAGE_BUFFER_BINDING 0x90D3 +#define GL_SHADER_STORAGE_BUFFER_START 0x90D4 +#define GL_SHADER_STORAGE_BUFFER_SIZE 0x90D5 +#define GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS 0x90D6 +#define GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS 0x90DA +#define GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS 0x90DB +#define GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS 0x90DC +#define GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS 0x90DD +#define GL_MAX_SHADER_STORAGE_BLOCK_SIZE 0x90DE +#define GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT 0x90DF +#define GL_SHADER_STORAGE_BARRIER_BIT 0x00002000 +#define GL_MAX_COMBINED_SHADER_OUTPUT_RESOURCES 0x8F39 +#define GL_DEPTH_STENCIL_TEXTURE_MODE 0x90EA +#define GL_STENCIL_INDEX 0x1901 +#define GL_MIN_PROGRAM_TEXTURE_GATHER_OFFSET 0x8E5E +#define GL_MAX_PROGRAM_TEXTURE_GATHER_OFFSET 0x8E5F +#define GL_SAMPLE_POSITION 0x8E50 +#define GL_SAMPLE_MASK 0x8E51 +#define GL_SAMPLE_MASK_VALUE 0x8E52 +#define GL_TEXTURE_2D_MULTISAMPLE 0x9100 +#define GL_MAX_SAMPLE_MASK_WORDS 0x8E59 +#define GL_MAX_COLOR_TEXTURE_SAMPLES 0x910E +#define GL_MAX_DEPTH_TEXTURE_SAMPLES 0x910F +#define GL_MAX_INTEGER_SAMPLES 0x9110 +#define GL_TEXTURE_BINDING_2D_MULTISAMPLE 0x9104 +#define GL_TEXTURE_SAMPLES 0x9106 +#define GL_TEXTURE_FIXED_SAMPLE_LOCATIONS 0x9107 +#define GL_TEXTURE_WIDTH 0x1000 +#define GL_TEXTURE_HEIGHT 0x1001 +#define GL_TEXTURE_DEPTH 0x8071 +#define GL_TEXTURE_INTERNAL_FORMAT 0x1003 +#define GL_TEXTURE_RED_SIZE 0x805C +#define GL_TEXTURE_GREEN_SIZE 0x805D +#define GL_TEXTURE_BLUE_SIZE 0x805E +#define GL_TEXTURE_ALPHA_SIZE 0x805F +#define GL_TEXTURE_DEPTH_SIZE 0x884A +#define GL_TEXTURE_STENCIL_SIZE 0x88F1 +#define GL_TEXTURE_SHARED_SIZE 0x8C3F +#define GL_TEXTURE_RED_TYPE 0x8C10 +#define GL_TEXTURE_GREEN_TYPE 0x8C11 +#define GL_TEXTURE_BLUE_TYPE 0x8C12 +#define GL_TEXTURE_ALPHA_TYPE 0x8C13 +#define GL_TEXTURE_DEPTH_TYPE 0x8C16 +#define GL_TEXTURE_COMPRESSED 0x86A1 +#define GL_SAMPLER_2D_MULTISAMPLE 0x9108 +#define GL_INT_SAMPLER_2D_MULTISAMPLE 0x9109 +#define GL_UNSIGNED_INT_SAMPLER_2D_MULTISAMPLE 0x910A +#define GL_VERTEX_ATTRIB_BINDING 0x82D4 +#define GL_VERTEX_ATTRIB_RELATIVE_OFFSET 0x82D5 +#define GL_VERTEX_BINDING_DIVISOR 0x82D6 +#define GL_VERTEX_BINDING_OFFSET 0x82D7 +#define GL_VERTEX_BINDING_STRIDE 0x82D8 +#define GL_VERTEX_BINDING_BUFFER 0x8F4F +#define GL_MAX_VERTEX_ATTRIB_RELATIVE_OFFSET 0x82D9 +#define GL_MAX_VERTEX_ATTRIB_BINDINGS 0x82DA +#define GL_MAX_VERTEX_ATTRIB_STRIDE 0x82E5 +GL_APICALL void GL_APIENTRY glDispatchCompute (GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z); +GL_APICALL void GL_APIENTRY glDispatchComputeIndirect (GLintptr indirect); +GL_APICALL void GL_APIENTRY glDrawArraysIndirect (GLenum mode, const void *indirect); +GL_APICALL void GL_APIENTRY glDrawElementsIndirect (GLenum mode, GLenum type, const void *indirect); +GL_APICALL void GL_APIENTRY glFramebufferParameteri (GLenum target, GLenum pname, GLint param); +GL_APICALL void GL_APIENTRY glGetFramebufferParameteriv (GLenum target, GLenum pname, GLint *params); +GL_APICALL void GL_APIENTRY glGetProgramInterfaceiv (GLuint program, GLenum programInterface, GLenum pname, GLint *params); +GL_APICALL GLuint GL_APIENTRY glGetProgramResourceIndex (GLuint program, GLenum programInterface, const GLchar *name); +GL_APICALL void GL_APIENTRY glGetProgramResourceName (GLuint program, GLenum programInterface, GLuint index, GLsizei bufSize, GLsizei *length, GLchar *name); +GL_APICALL void GL_APIENTRY glGetProgramResourceiv (GLuint program, GLenum programInterface, GLuint index, GLsizei propCount, const GLenum *props, GLsizei bufSize, GLsizei *length, GLint *params); +GL_APICALL GLint GL_APIENTRY glGetProgramResourceLocation (GLuint program, GLenum programInterface, const GLchar *name); +GL_APICALL void GL_APIENTRY glUseProgramStages (GLuint pipeline, GLbitfield stages, GLuint program); +GL_APICALL void GL_APIENTRY glActiveShaderProgram (GLuint pipeline, GLuint program); +GL_APICALL GLuint GL_APIENTRY glCreateShaderProgramv (GLenum type, GLsizei count, const GLchar *const*strings); +GL_APICALL void GL_APIENTRY glBindProgramPipeline (GLuint pipeline); +GL_APICALL void GL_APIENTRY glDeleteProgramPipelines (GLsizei n, const GLuint *pipelines); +GL_APICALL void GL_APIENTRY glGenProgramPipelines (GLsizei n, GLuint *pipelines); +GL_APICALL GLboolean GL_APIENTRY glIsProgramPipeline (GLuint pipeline); +GL_APICALL void GL_APIENTRY glGetProgramPipelineiv (GLuint pipeline, GLenum pname, GLint *params); +GL_APICALL void GL_APIENTRY glProgramUniform1i (GLuint program, GLint location, GLint v0); +GL_APICALL void GL_APIENTRY glProgramUniform2i (GLuint program, GLint location, GLint v0, GLint v1); +GL_APICALL void GL_APIENTRY glProgramUniform3i (GLuint program, GLint location, GLint v0, GLint v1, GLint v2); +GL_APICALL void GL_APIENTRY glProgramUniform4i (GLuint program, GLint location, GLint v0, GLint v1, GLint v2, GLint v3); +GL_APICALL void GL_APIENTRY glProgramUniform1ui (GLuint program, GLint location, GLuint v0); +GL_APICALL void GL_APIENTRY glProgramUniform2ui (GLuint program, GLint location, GLuint v0, GLuint v1); +GL_APICALL void GL_APIENTRY glProgramUniform3ui (GLuint program, GLint location, GLuint v0, GLuint v1, GLuint v2); +GL_APICALL void GL_APIENTRY glProgramUniform4ui (GLuint program, GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3); +GL_APICALL void GL_APIENTRY glProgramUniform1f (GLuint program, GLint location, GLfloat v0); +GL_APICALL void GL_APIENTRY glProgramUniform2f (GLuint program, GLint location, GLfloat v0, GLfloat v1); +GL_APICALL void GL_APIENTRY glProgramUniform3f (GLuint program, GLint location, GLfloat v0, GLfloat v1, GLfloat v2); +GL_APICALL void GL_APIENTRY glProgramUniform4f (GLuint program, GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3); +GL_APICALL void GL_APIENTRY glProgramUniform1iv (GLuint program, GLint location, GLsizei count, const GLint *value); +GL_APICALL void GL_APIENTRY glProgramUniform2iv (GLuint program, GLint location, GLsizei count, const GLint *value); +GL_APICALL void GL_APIENTRY glProgramUniform3iv (GLuint program, GLint location, GLsizei count, const GLint *value); +GL_APICALL void GL_APIENTRY glProgramUniform4iv (GLuint program, GLint location, GLsizei count, const GLint *value); +GL_APICALL void GL_APIENTRY glProgramUniform1uiv (GLuint program, GLint location, GLsizei count, const GLuint *value); +GL_APICALL void GL_APIENTRY glProgramUniform2uiv (GLuint program, GLint location, GLsizei count, const GLuint *value); +GL_APICALL void GL_APIENTRY glProgramUniform3uiv (GLuint program, GLint location, GLsizei count, const GLuint *value); +GL_APICALL void GL_APIENTRY glProgramUniform4uiv (GLuint program, GLint location, GLsizei count, const GLuint *value); +GL_APICALL void GL_APIENTRY glProgramUniform1fv (GLuint program, GLint location, GLsizei count, const GLfloat *value); +GL_APICALL void GL_APIENTRY glProgramUniform2fv (GLuint program, GLint location, GLsizei count, const GLfloat *value); +GL_APICALL void GL_APIENTRY glProgramUniform3fv (GLuint program, GLint location, GLsizei count, const GLfloat *value); +GL_APICALL void GL_APIENTRY glProgramUniform4fv (GLuint program, GLint location, GLsizei count, const GLfloat *value); +GL_APICALL void GL_APIENTRY glProgramUniformMatrix2fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GL_APICALL void GL_APIENTRY glProgramUniformMatrix3fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GL_APICALL void GL_APIENTRY glProgramUniformMatrix4fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GL_APICALL void GL_APIENTRY glProgramUniformMatrix2x3fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GL_APICALL void GL_APIENTRY glProgramUniformMatrix3x2fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GL_APICALL void GL_APIENTRY glProgramUniformMatrix2x4fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GL_APICALL void GL_APIENTRY glProgramUniformMatrix4x2fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GL_APICALL void GL_APIENTRY glProgramUniformMatrix3x4fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GL_APICALL void GL_APIENTRY glProgramUniformMatrix4x3fv (GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat *value); +GL_APICALL void GL_APIENTRY glValidateProgramPipeline (GLuint pipeline); +GL_APICALL void GL_APIENTRY glGetProgramPipelineInfoLog (GLuint pipeline, GLsizei bufSize, GLsizei *length, GLchar *infoLog); +GL_APICALL void GL_APIENTRY glBindImageTexture (GLuint unit, GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum access, GLenum format); +GL_APICALL void GL_APIENTRY glGetBooleani_v (GLenum target, GLuint index, GLboolean *data); +GL_APICALL void GL_APIENTRY glMemoryBarrier (GLbitfield barriers); +GL_APICALL void GL_APIENTRY glMemoryBarrierByRegion (GLbitfield barriers); +GL_APICALL void GL_APIENTRY glTexStorage2DMultisample (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLboolean fixedsamplelocations); +GL_APICALL void GL_APIENTRY glGetMultisamplefv (GLenum pname, GLuint index, GLfloat *val); +GL_APICALL void GL_APIENTRY glSampleMaski (GLuint maskNumber, GLbitfield mask); +GL_APICALL void GL_APIENTRY glGetTexLevelParameteriv (GLenum target, GLint level, GLenum pname, GLint *params); +GL_APICALL void GL_APIENTRY glGetTexLevelParameterfv (GLenum target, GLint level, GLenum pname, GLfloat *params); +GL_APICALL void GL_APIENTRY glBindVertexBuffer (GLuint bindingindex, GLuint buffer, GLintptr offset, GLsizei stride); +GL_APICALL void GL_APIENTRY glVertexAttribFormat (GLuint attribindex, GLint size, GLenum type, GLboolean normalized, GLuint relativeoffset); +GL_APICALL void GL_APIENTRY glVertexAttribIFormat (GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset); +GL_APICALL void GL_APIENTRY glVertexAttribBinding (GLuint attribindex, GLuint bindingindex); +GL_APICALL void GL_APIENTRY glVertexBindingDivisor (GLuint bindingindex, GLuint divisor); +#endif /* GL_ES_VERSION_3_1 */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/opengles-3.1/include/GLES3/gl3ext.h b/opengles-3.1/include/GLES3/gl3ext.h new file mode 100644 index 0000000000..4d4ea96c4d --- /dev/null +++ b/opengles-3.1/include/GLES3/gl3ext.h @@ -0,0 +1,24 @@ +#ifndef __gl3ext_h_ +#define __gl3ext_h_ + +/* $Revision: 17809 $ on $Date:: 2012-05-14 08:03:36 -0700 #$ */ + +/* + * This document is licensed under the SGI Free Software B License Version + * 2.0. For details, see http://oss.sgi.com/projects/FreeB/ . + */ + +/* OpenGL ES 3 Extensions + * + * After an OES extension's interactions with OpenGl ES 3.0 have been documented, + * its tokens and function definitions should be added to this file in a manner + * that does not conflict with gl2ext.h or gl3.h. + * + * Tokens and function definitions for extensions that have become standard + * features in OpenGL ES 3.0 will not be added to this file. + * + * Applications using OpenGL-ES-2-only extensions should include gl2ext.h + */ + +#endif /* __gl3ext_h_ */ + diff --git a/opengles-3.1/include/GLES3/gl3platform.h b/opengles-3.1/include/GLES3/gl3platform.h new file mode 100644 index 0000000000..1bd1a850fa --- /dev/null +++ b/opengles-3.1/include/GLES3/gl3platform.h @@ -0,0 +1,30 @@ +#ifndef __gl3platform_h_ +#define __gl3platform_h_ + +/* $Revision: 18437 $ on $Date:: 2012-07-08 23:31:39 -0700 #$ */ + +/* + * This document is licensed under the SGI Free Software B License Version + * 2.0. For details, see http://oss.sgi.com/projects/FreeB/ . + */ + +/* Platform-specific types and definitions for OpenGL ES 3.X gl3.h + * + * Adopters may modify khrplatform.h and this file to suit their platform. + * You are encouraged to submit all modifications to the Khronos group so that + * they can be included in future versions of this file. Please submit changes + * by sending them to the public Khronos Bugzilla (http://khronos.org/bugzilla) + * by filing a bug against product "OpenGL-ES" component "Registry". + */ + +#include + +#ifndef GL_APICALL +#define GL_APICALL KHRONOS_APICALL +#endif + +#ifndef GL_APIENTRY +#define GL_APIENTRY KHRONOS_APIENTRY +#endif + +#endif /* __gl3platform_h_ */ diff --git a/opengles-3.1/include/KHR/khrplatform.h b/opengles-3.1/include/KHR/khrplatform.h new file mode 100644 index 0000000000..11e873ea96 --- /dev/null +++ b/opengles-3.1/include/KHR/khrplatform.h @@ -0,0 +1,273 @@ +#ifndef __khrplatform_h_ +#define __khrplatform_h_ + +/* +** Copyright (c) 2008-2009 The Khronos Group Inc. +** +** Permission is hereby granted, free of charge, to any person obtaining a +** copy of this software and/or associated documentation files (the +** "Materials"), to deal in the Materials without restriction, including +** without limitation the rights to use, copy, modify, merge, publish, +** distribute, sublicense, and/or sell copies of the Materials, and to +** permit persons to whom the Materials are furnished to do so, subject to +** the following conditions: +** +** The above copyright notice and this permission notice shall be included +** in all copies or substantial portions of the Materials. +** +** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. +*/ + +/* Khronos platform-specific types and definitions. + * + * $Revision: 9356 $ on $Date: 2009-10-21 02:52:25 -0700 (Wed, 21 Oct 2009) $ + * + * Adopters may modify this file to suit their platform. Adopters are + * encouraged to submit platform specific modifications to the Khronos + * group so that they can be included in future versions of this file. + * Please submit changes by sending them to the public Khronos Bugzilla + * (http://khronos.org/bugzilla) by filing a bug against product + * "Khronos (general)" component "Registry". + * + * A predefined template which fills in some of the bug fields can be + * reached using http://tinyurl.com/khrplatform-h-bugreport, but you + * must create a Bugzilla login first. + * + * + * See the Implementer's Guidelines for information about where this file + * should be located on your system and for more details of its use: + * http://www.khronos.org/registry/implementers_guide.pdf + * + * This file should be included as + * #include + * by Khronos client API header files that use its types and defines. + * + * The types in khrplatform.h should only be used to define API-specific types. + * + * Types defined in khrplatform.h: + * khronos_int8_t signed 8 bit + * khronos_uint8_t unsigned 8 bit + * khronos_int16_t signed 16 bit + * khronos_uint16_t unsigned 16 bit + * khronos_int32_t signed 32 bit + * khronos_uint32_t unsigned 32 bit + * khronos_int64_t signed 64 bit + * khronos_uint64_t unsigned 64 bit + * khronos_intptr_t signed same number of bits as a pointer + * khronos_uintptr_t unsigned same number of bits as a pointer + * khronos_ssize_t signed size + * khronos_usize_t unsigned size + * khronos_float_t signed 32 bit floating point + * khronos_time_ns_t unsigned 64 bit time in nanoseconds + * khronos_utime_nanoseconds_t unsigned time interval or absolute time in + * nanoseconds + * khronos_stime_nanoseconds_t signed time interval in nanoseconds + * khronos_boolean_enum_t enumerated boolean type. This should + * only be used as a base type when a client API's boolean type is + * an enum. Client APIs which use an integer or other type for + * booleans cannot use this as the base type for their boolean. + * + * Tokens defined in khrplatform.h: + * + * KHRONOS_FALSE, KHRONOS_TRUE Enumerated boolean false/true values. + * + * KHRONOS_SUPPORT_INT64 is 1 if 64 bit integers are supported; otherwise 0. + * KHRONOS_SUPPORT_FLOAT is 1 if floats are supported; otherwise 0. + * + * Calling convention macros defined in this file: + * KHRONOS_APICALL + * KHRONOS_APIENTRY + * KHRONOS_APIATTRIBUTES + * + * These may be used in function prototypes as: + * + * KHRONOS_APICALL void KHRONOS_APIENTRY funcname( + * int arg1, + * int arg2) KHRONOS_APIATTRIBUTES; + */ + +/*------------------------------------------------------------------------- + * Definition of KHRONOS_APICALL + *------------------------------------------------------------------------- + * This precedes the return type of the function in the function prototype. + */ +#if defined(_WIN32) && !defined(__SCITECH_SNAP__) +# if defined (_DLL_EXPORTS) +# define KHRONOS_APICALL __declspec(dllexport) +# else +# define KHRONOS_APICALL __declspec(dllimport) +# endif +#elif defined (__SYMBIAN32__) +# define KHRONOS_APICALL IMPORT_C +#else +# define KHRONOS_APICALL +#endif + +/*------------------------------------------------------------------------- + * Definition of KHRONOS_APIENTRY + *------------------------------------------------------------------------- + * This follows the return type of the function and precedes the function + * name in the function prototype. + */ +#if defined(_WIN32) && !defined(_WIN32_WCE) && !defined(__SCITECH_SNAP__) + /* Win32 but not WinCE */ +# define KHRONOS_APIENTRY __stdcall +#else +# define KHRONOS_APIENTRY +#endif + +/*------------------------------------------------------------------------- + * Definition of KHRONOS_APIATTRIBUTES + *------------------------------------------------------------------------- + * This follows the closing parenthesis of the function prototype arguments. + */ +#if defined (__ARMCC_2__) +#define KHRONOS_APIATTRIBUTES __softfp +#else +#define KHRONOS_APIATTRIBUTES +#endif + +/*------------------------------------------------------------------------- + * basic type definitions + *-----------------------------------------------------------------------*/ +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || defined(__GNUC__) || defined(__SCO__) || defined(__USLC__) + + +/* + * Using + */ +#include +typedef int32_t khronos_int32_t; +typedef uint32_t khronos_uint32_t; +typedef int64_t khronos_int64_t; +typedef uint64_t khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#elif defined(__VMS ) || defined(__sgi) + +/* + * Using + */ +#include +typedef int32_t khronos_int32_t; +typedef uint32_t khronos_uint32_t; +typedef int64_t khronos_int64_t; +typedef uint64_t khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#elif defined(_WIN32) && !defined(__SCITECH_SNAP__) + +/* + * Win32 + */ +typedef __int32 khronos_int32_t; +typedef unsigned __int32 khronos_uint32_t; +typedef __int64 khronos_int64_t; +typedef unsigned __int64 khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#elif defined(__sun__) || defined(__digital__) + +/* + * Sun or Digital + */ +typedef int khronos_int32_t; +typedef unsigned int khronos_uint32_t; +#if defined(__arch64__) || defined(_LP64) +typedef long int khronos_int64_t; +typedef unsigned long int khronos_uint64_t; +#else +typedef long long int khronos_int64_t; +typedef unsigned long long int khronos_uint64_t; +#endif /* __arch64__ */ +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#elif 0 + +/* + * Hypothetical platform with no float or int64 support + */ +typedef int khronos_int32_t; +typedef unsigned int khronos_uint32_t; +#define KHRONOS_SUPPORT_INT64 0 +#define KHRONOS_SUPPORT_FLOAT 0 + +#else + +/* + * Generic fallback + */ +#include +typedef int32_t khronos_int32_t; +typedef uint32_t khronos_uint32_t; +typedef int64_t khronos_int64_t; +typedef uint64_t khronos_uint64_t; +#define KHRONOS_SUPPORT_INT64 1 +#define KHRONOS_SUPPORT_FLOAT 1 + +#endif + + +/* + * Types that are (so far) the same on all platforms + */ +typedef signed char khronos_int8_t; +typedef unsigned char khronos_uint8_t; +typedef signed short int khronos_int16_t; +typedef unsigned short int khronos_uint16_t; +typedef signed long int khronos_intptr_t; +typedef unsigned long int khronos_uintptr_t; +typedef signed long int khronos_ssize_t; +typedef unsigned long int khronos_usize_t; + +#if KHRONOS_SUPPORT_FLOAT +/* + * Float type + */ +typedef float khronos_float_t; +#endif + +#if KHRONOS_SUPPORT_INT64 +/* Time types + * + * These types can be used to represent a time interval in nanoseconds or + * an absolute Unadjusted System Time. Unadjusted System Time is the number + * of nanoseconds since some arbitrary system event (e.g. since the last + * time the system booted). The Unadjusted System Time is an unsigned + * 64 bit value that wraps back to 0 every 584 years. Time intervals + * may be either signed or unsigned. + */ +typedef khronos_uint64_t khronos_utime_nanoseconds_t; +typedef khronos_int64_t khronos_stime_nanoseconds_t; +#endif + +/* + * Dummy value used to pad enum types to 32 bits. + */ +#ifndef KHRONOS_MAX_ENUM +#define KHRONOS_MAX_ENUM 0x7FFFFFFF +#endif + +/* + * Enumerated boolean type + * + * Values other than zero should be considered to be true. Therefore + * comparisons should not be made against KHRONOS_TRUE. + */ +typedef enum { + KHRONOS_FALSE = 0, + KHRONOS_TRUE = 1, + KHRONOS_BOOLEAN_ENUM_FORCE_SIZE = KHRONOS_MAX_ENUM +} khronos_boolean_enum_t; + +#endif /* __khrplatform_h_ */ diff --git a/opengles-3.1/mali_include/EGL/fbdev_window.h b/opengles-3.1/mali_include/EGL/fbdev_window.h new file mode 100644 index 0000000000..f5f90b5370 --- /dev/null +++ b/opengles-3.1/mali_include/EGL/fbdev_window.h @@ -0,0 +1,50 @@ +/* vim:set sts=4 ts=4 noexpandtab: */ +/* + * This confidential and proprietary software may be used only as + * authorised by a licensing agreement from ARM Limited + * (C) COPYRIGHT 2008,2009 ARM Limited + * ALL RIGHTS RESERVED + * The entire notice above must be reproduced on all authorised + * copies and copies may only be made to the extent permitted + * by a licensing agreement from ARM Limited. + */ + +#ifndef _FBDEV_WINDOW_H_ +#define _FBDEV_WINDOW_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum +{ + FBDEV_PIXMAP_SUPPORTS_UMP = (1<<0) +} fbdev_pixmap_flags; + +typedef struct fbdev_window +{ + unsigned short width; + unsigned short height; +} fbdev_window; + +typedef struct fbdev_pixmap +{ + unsigned int height; + unsigned int width; + unsigned int bytes_per_pixel; + unsigned char buffer_size; + unsigned char red_size; + unsigned char green_size; + unsigned char blue_size; + unsigned char alpha_size; + unsigned char luminance_size; + fbdev_pixmap_flags flags; + unsigned short *data; +} fbdev_pixmap; + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/opengles-3.1/stubs/EGL.c b/opengles-3.1/stubs/EGL.c new file mode 100644 index 0000000000..dc0d4bd989 --- /dev/null +++ b/opengles-3.1/stubs/EGL.c @@ -0,0 +1,40 @@ +#include + +#define PRINT_STUB_ERROR printf("\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\nERROR: %s from stub libEGL.so library called! This library can be used to resolve OpenGL ES symbols at compile time but must *not* be in your runtime path (You need to use a real OpenGL ES implementation, this one is empty)\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n", __func__) + +void eglBindAPI(void) { PRINT_STUB_ERROR; return; } +void eglBindTexImage(void) { PRINT_STUB_ERROR; return; } +void eglChooseConfig(void) { PRINT_STUB_ERROR; return; } +void eglCopyBuffers(void) { PRINT_STUB_ERROR; return; } +void eglCreateContext(void) { PRINT_STUB_ERROR; return; } +void eglCreateImageKHR (void) { PRINT_STUB_ERROR; return; } +void eglCreatePbufferFromClientBuffer(void) { PRINT_STUB_ERROR; return; } +void eglCreatePbufferSurface(void) { PRINT_STUB_ERROR; return; } +void eglCreatePixmapSurface(void) { PRINT_STUB_ERROR; return; } +void eglCreateWindowSurface(void) { PRINT_STUB_ERROR; return; } +void eglDestroyContext(void) { PRINT_STUB_ERROR; return; } +void eglDestroyImageKHR (void) { PRINT_STUB_ERROR; return; } +void eglDestroySurface(void) { PRINT_STUB_ERROR; return; } +void eglGetConfigAttrib(void) { PRINT_STUB_ERROR; return; } +void eglGetConfigs(void) { PRINT_STUB_ERROR; return; } +void eglGetCurrentContext(void) { PRINT_STUB_ERROR; return; } +void eglGetCurrentDisplay(void) { PRINT_STUB_ERROR; return; } +void eglGetCurrentSurface(void) { PRINT_STUB_ERROR; return; } +void eglGetDisplay(void) { PRINT_STUB_ERROR; return; } +void eglGetError(void) { PRINT_STUB_ERROR; return; } +void eglGetProcAddress(void) { PRINT_STUB_ERROR; return; } +void eglInitialize(void) { PRINT_STUB_ERROR; return; } +void eglMakeCurrent(void) { PRINT_STUB_ERROR; return; } +void eglQueryAPI(void) { PRINT_STUB_ERROR; return; } +void eglQueryContext(void) { PRINT_STUB_ERROR; return; } +void eglQueryString(void) { PRINT_STUB_ERROR; return; } +void eglQuerySurface(void) { PRINT_STUB_ERROR; return; } +void eglReleaseTexImage(void) { PRINT_STUB_ERROR; return; } +void eglReleaseThread(void) { PRINT_STUB_ERROR; return; } +void eglSurfaceAttrib(void) { PRINT_STUB_ERROR; return; } +void eglSwapBuffers(void) { PRINT_STUB_ERROR; return; } +void eglSwapInterval(void) { PRINT_STUB_ERROR; return; } +void eglTerminate(void) { PRINT_STUB_ERROR; return; } +void eglWaitClient(void) { PRINT_STUB_ERROR; return; } +void eglWaitGL(void) { PRINT_STUB_ERROR; return; } +void eglWaitNative(void) { PRINT_STUB_ERROR; return; } diff --git a/opengles-3.1/stubs/GLESv2.c b/opengles-3.1/stubs/GLESv2.c new file mode 100644 index 0000000000..c58f0f7772 --- /dev/null +++ b/opengles-3.1/stubs/GLESv2.c @@ -0,0 +1,269 @@ +#include + +#define PRINT_STUB_ERROR printf("\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\nERROR: %s from stub libGLESv2.so library called! This library can be used to resolve OpenGL ES symbols at compile time but must *not* be in your runtime path (You need to use a real OpenGL ES implementation, this one is empty)\n!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n", __func__) + +void glActiveTexture(void) { PRINT_STUB_ERROR; return; } +void glAttachShader(void) { PRINT_STUB_ERROR; return; } +void glBindAttribLocation(void) { PRINT_STUB_ERROR; return; } +void glBindBuffer(void) { PRINT_STUB_ERROR; return; } +void glBindFramebuffer(void) { PRINT_STUB_ERROR; return; } +void glBindRenderbuffer(void) { PRINT_STUB_ERROR; return; } +void glBindTexture(void) { PRINT_STUB_ERROR; return; } +void glBlendColor(void) { PRINT_STUB_ERROR; return; } +void glBlendEquation(void) { PRINT_STUB_ERROR; return; } +void glBlendEquationSeparate(void) { PRINT_STUB_ERROR; return; } +void glBlendFunc(void) { PRINT_STUB_ERROR; return; } +void glBlendFuncSeparate(void) { PRINT_STUB_ERROR; return; } +void glBufferData(void) { PRINT_STUB_ERROR; return; } +void glBufferSubData(void) { PRINT_STUB_ERROR; return; } +void glCheckFramebufferStatus(void) { PRINT_STUB_ERROR; return; } +void glClear(void) { PRINT_STUB_ERROR; return; } +void glClearColor(void) { PRINT_STUB_ERROR; return; } +void glClearDepthf(void) { PRINT_STUB_ERROR; return; } +void glClearStencil(void) { PRINT_STUB_ERROR; return; } +void glColorMask(void) { PRINT_STUB_ERROR; return; } +void glCompileShader(void) { PRINT_STUB_ERROR; return; } +void glCompressedTexImage2D(void) { PRINT_STUB_ERROR; return; } +void glCompressedTexSubImage2D(void) { PRINT_STUB_ERROR; return; } +void glCopyTexImage2D(void) { PRINT_STUB_ERROR; return; } +void glCopyTexSubImage2D(void) { PRINT_STUB_ERROR; return; } +void glCreateProgram(void) { PRINT_STUB_ERROR; return; } +void glCreateShader(void) { PRINT_STUB_ERROR; return; } +void glCullFace(void) { PRINT_STUB_ERROR; return; } +void glDeleteBuffers(void) { PRINT_STUB_ERROR; return; } +void glDeleteFramebuffers(void) { PRINT_STUB_ERROR; return; } +void glDeleteProgram(void) { PRINT_STUB_ERROR; return; } +void glDeleteRenderbuffers(void) { PRINT_STUB_ERROR; return; } +void glDeleteShader(void) { PRINT_STUB_ERROR; return; } +void glDeleteTextures(void) { PRINT_STUB_ERROR; return; } +void glDepthFunc(void) { PRINT_STUB_ERROR; return; } +void glDepthMask(void) { PRINT_STUB_ERROR; return; } +void glDepthRangef(void) { PRINT_STUB_ERROR; return; } +void glDetachShader(void) { PRINT_STUB_ERROR; return; } +void glDisable(void) { PRINT_STUB_ERROR; return; } +void glDisableVertexAttribArray(void) { PRINT_STUB_ERROR; return; } +void glDrawArrays(void) { PRINT_STUB_ERROR; return; } +void glDrawElements(void) { PRINT_STUB_ERROR; return; } +void glEnable(void) { PRINT_STUB_ERROR; return; } +void glEnableVertexAttribArray(void) { PRINT_STUB_ERROR; return; } +void glFinish(void) { PRINT_STUB_ERROR; return; } +void glFlush(void) { PRINT_STUB_ERROR; return; } +void glFramebufferRenderbuffer(void) { PRINT_STUB_ERROR; return; } +void glFramebufferTexture2D(void) { PRINT_STUB_ERROR; return; } +void glFrontFace(void) { PRINT_STUB_ERROR; return; } +void glGenBuffers(void) { PRINT_STUB_ERROR; return; } +void glGenerateMipmap(void) { PRINT_STUB_ERROR; return; } +void glGenFramebuffers(void) { PRINT_STUB_ERROR; return; } +void glGenRenderbuffers(void) { PRINT_STUB_ERROR; return; } +void glGenTextures(void) { PRINT_STUB_ERROR; return; } +void glGetActiveAttrib(void) { PRINT_STUB_ERROR; return; } +void glGetActiveUniform(void) { PRINT_STUB_ERROR; return; } +void glGetAttachedShaders(void) { PRINT_STUB_ERROR; return; } +void glGetAttribLocation(void) { PRINT_STUB_ERROR; return; } +void glGetBooleanv(void) { PRINT_STUB_ERROR; return; } +void glGetBufferParameteriv(void) { PRINT_STUB_ERROR; return; } +void glGetError(void) { PRINT_STUB_ERROR; return; } +void glGetFloatv(void) { PRINT_STUB_ERROR; return; } +void glGetFramebufferAttachmentParameteriv(void) { PRINT_STUB_ERROR; return; } +void glGetIntegerv(void) { PRINT_STUB_ERROR; return; } +void glGetProgramiv(void) { PRINT_STUB_ERROR; return; } +void glGetProgramInfoLog(void) { PRINT_STUB_ERROR; return; } +void glGetRenderbufferParameteriv(void) { PRINT_STUB_ERROR; return; } +void glGetShaderiv(void) { PRINT_STUB_ERROR; return; } +void glGetShaderInfoLog(void) { PRINT_STUB_ERROR; return; } +void glGetShaderPrecisionFormat(void) { PRINT_STUB_ERROR; return; } +void glGetShaderSource(void) { PRINT_STUB_ERROR; return; } +void glGetString(void) { PRINT_STUB_ERROR; return; } +void glGetTexParameterfv(void) { PRINT_STUB_ERROR; return; } +void glGetTexParameteriv(void) { PRINT_STUB_ERROR; return; } +void glGetUniformfv(void) { PRINT_STUB_ERROR; return; } +void glGetUniformiv(void) { PRINT_STUB_ERROR; return; } +void glGetUniformLocation(void) { PRINT_STUB_ERROR; return; } +void glGetVertexAttribfv(void) { PRINT_STUB_ERROR; return; } +void glGetVertexAttribiv(void) { PRINT_STUB_ERROR; return; } +void glGetVertexAttribPointerv(void) { PRINT_STUB_ERROR; return; } +void glHint(void) { PRINT_STUB_ERROR; return; } +void glIsBuffer(void) { PRINT_STUB_ERROR; return; } +void glIsEnabled(void) { PRINT_STUB_ERROR; return; } +void glIsFramebuffer(void) { PRINT_STUB_ERROR; return; } +void glIsProgram(void) { PRINT_STUB_ERROR; return; } +void glIsRenderbuffer(void) { PRINT_STUB_ERROR; return; } +void glIsShader(void) { PRINT_STUB_ERROR; return; } +void glIsTexture(void) { PRINT_STUB_ERROR; return; } +void glLineWidth(void) { PRINT_STUB_ERROR; return; } +void glLinkProgram(void) { PRINT_STUB_ERROR; return; } +void glPixelStorei(void) { PRINT_STUB_ERROR; return; } +void glPolygonOffset(void) { PRINT_STUB_ERROR; return; } +void glReadPixels(void) { PRINT_STUB_ERROR; return; } +void glReleaseShaderCompiler(void) { PRINT_STUB_ERROR; return; } +void glRenderbufferStorage(void) { PRINT_STUB_ERROR; return; } +void glSampleCoverage(void) { PRINT_STUB_ERROR; return; } +void glScissor(void) { PRINT_STUB_ERROR; return; } +void glShaderBinary(void) { PRINT_STUB_ERROR; return; } +void glShaderSource(void) { PRINT_STUB_ERROR; return; } +void glStencilFunc(void) { PRINT_STUB_ERROR; return; } +void glStencilFuncSeparate(void) { PRINT_STUB_ERROR; return; } +void glStencilMask(void) { PRINT_STUB_ERROR; return; } +void glStencilMaskSeparate(void) { PRINT_STUB_ERROR; return; } +void glStencilOp(void) { PRINT_STUB_ERROR; return; } +void glStencilOpSeparate(void) { PRINT_STUB_ERROR; return; } +void glTexImage2D(void) { PRINT_STUB_ERROR; return; } +void glTexParameterf(void) { PRINT_STUB_ERROR; return; } +void glTexParameterfv(void) { PRINT_STUB_ERROR; return; } +void glTexParameteri(void) { PRINT_STUB_ERROR; return; } +void glTexParameteriv(void) { PRINT_STUB_ERROR; return; } +void glTexSubImage2D(void) { PRINT_STUB_ERROR; return; } +void glUniform1f(void) { PRINT_STUB_ERROR; return; } +void glUniform1fv(void) { PRINT_STUB_ERROR; return; } +void glUniform1i(void) { PRINT_STUB_ERROR; return; } +void glUniform1iv(void) { PRINT_STUB_ERROR; return; } +void glUniform2f(void) { PRINT_STUB_ERROR; return; } +void glUniform2fv(void) { PRINT_STUB_ERROR; return; } +void glUniform2i(void) { PRINT_STUB_ERROR; return; } +void glUniform2iv(void) { PRINT_STUB_ERROR; return; } +void glUniform3f(void) { PRINT_STUB_ERROR; return; } +void glUniform3fv(void) { PRINT_STUB_ERROR; return; } +void glUniform3i(void) { PRINT_STUB_ERROR; return; } +void glUniform3iv(void) { PRINT_STUB_ERROR; return; } +void glUniform4f(void) { PRINT_STUB_ERROR; return; } +void glUniform4fv(void) { PRINT_STUB_ERROR; return; } +void glUniform4i(void) { PRINT_STUB_ERROR; return; } +void glUniform4iv(void) { PRINT_STUB_ERROR; return; } +void glUniformMatrix2fv(void) { PRINT_STUB_ERROR; return; } +void glUniformMatrix3fv(void) { PRINT_STUB_ERROR; return; } +void glUniformMatrix4fv(void) { PRINT_STUB_ERROR; return; } +void glUseProgram(void) { PRINT_STUB_ERROR; return; } +void glValidateProgram(void) { PRINT_STUB_ERROR; return; } +void glVertexAttrib1f(void) { PRINT_STUB_ERROR; return; } +void glVertexAttrib1fv(void) { PRINT_STUB_ERROR; return; } +void glVertexAttrib2f(void) { PRINT_STUB_ERROR; return; } +void glVertexAttrib2fv(void) { PRINT_STUB_ERROR; return; } +void glVertexAttrib3f(void) { PRINT_STUB_ERROR; return; } +void glVertexAttrib3fv(void) { PRINT_STUB_ERROR; return; } +void glVertexAttrib4f(void) { PRINT_STUB_ERROR; return; } +void glVertexAttrib4fv(void) { PRINT_STUB_ERROR; return; } +void glVertexAttribPointer(void) { PRINT_STUB_ERROR; return; } +void glViewport(void) { PRINT_STUB_ERROR; return; } + +/* OpenGL ES 3.0 */ + +void glReadBuffer(void) { PRINT_STUB_ERROR; return; } +void glDrawRangeElements(void) { PRINT_STUB_ERROR; return; } +void glTexImage3D(void) { PRINT_STUB_ERROR; return; } +void glTexSubImage3D(void) { PRINT_STUB_ERROR; return; } +void glCopyTexSubImage3D(void) { PRINT_STUB_ERROR; return; } +void glCompressedTexImage3D(void) { PRINT_STUB_ERROR; return; } +void glCompressedTexSubImage3D(void) { PRINT_STUB_ERROR; return; } +void glGenQueries(void) { PRINT_STUB_ERROR; return; } +void glDeleteQueries(void) { PRINT_STUB_ERROR; return; } +void glIsQuery(void) { PRINT_STUB_ERROR; return; } +void glBeginQuery(void) { PRINT_STUB_ERROR; return; } +void glEndQuery(void) { PRINT_STUB_ERROR; return; } +void glGetQueryiv(void) { PRINT_STUB_ERROR; return; } +void glGetQueryObjectuiv(void) { PRINT_STUB_ERROR; return; } +void glUnmapBuffer(void) { PRINT_STUB_ERROR; return; } +void glGetBufferPointerv(void) { PRINT_STUB_ERROR; return; } +void glDrawBuffers(void) { PRINT_STUB_ERROR; return; } +void glUniformMatrix2x3fv(void) { PRINT_STUB_ERROR; return; } +void glUniformMatrix3x2fv(void) { PRINT_STUB_ERROR; return; } +void glUniformMatrix2x4fv(void) { PRINT_STUB_ERROR; return; } +void glUniformMatrix4x2fv(void) { PRINT_STUB_ERROR; return; } +void glUniformMatrix3x4fv(void) { PRINT_STUB_ERROR; return; } +void glUniformMatrix4x3fv(void) { PRINT_STUB_ERROR; return; } +void glBlitFramebuffer(void) { PRINT_STUB_ERROR; return; } +void glRenderbufferStorageMultisample(void) { PRINT_STUB_ERROR; return; } +void glFramebufferTextureLayer(void) { PRINT_STUB_ERROR; return; } +void glMapBufferRange(void) { PRINT_STUB_ERROR; return; } +void glFlushMappedBufferRange(void) { PRINT_STUB_ERROR; return; } +void glBindVertexArray(void) { PRINT_STUB_ERROR; return; } +void glDeleteVertexArrays(void) { PRINT_STUB_ERROR; return; } +void glGenVertexArrays(void) { PRINT_STUB_ERROR; return; } +void glIsVertexArray(void) { PRINT_STUB_ERROR; return; } +void glGetIntegeri_v(void) { PRINT_STUB_ERROR; return; } +void glBeginTransformFeedback(void) { PRINT_STUB_ERROR; return; } +void glEndTransformFeedback(void) { PRINT_STUB_ERROR; return; } +void glBindBufferRange(void) { PRINT_STUB_ERROR; return; } +void glBindBufferBase(void) { PRINT_STUB_ERROR; return; } +void glTransformFeedbackVaryings(void) { PRINT_STUB_ERROR; return; } +void glGetTransformFeedbackVarying(void) { PRINT_STUB_ERROR; return; } +void glVertexAttribIPointer(void) { PRINT_STUB_ERROR; return; } +void glGetVertexAttribIiv(void) { PRINT_STUB_ERROR; return; } +void glGetVertexAttribIuiv(void) { PRINT_STUB_ERROR; return; } +void glVertexAttribI4i(void) { PRINT_STUB_ERROR; return; } +void glVertexAttribI4ui(void) { PRINT_STUB_ERROR; return; } +void glVertexAttribI4iv(void) { PRINT_STUB_ERROR; return; } +void glVertexAttribI4uiv(void) { PRINT_STUB_ERROR; return; } +void glGetUniformuiv(void) { PRINT_STUB_ERROR; return; } +void glGetFragDataLocation(void) { PRINT_STUB_ERROR; return; } +void glUniform1ui(void) { PRINT_STUB_ERROR; return; } +void glUniform2ui(void) { PRINT_STUB_ERROR; return; } +void glUniform3ui(void) { PRINT_STUB_ERROR; return; } +void glUniform4ui(void) { PRINT_STUB_ERROR; return; } +void glUniform1uiv(void) { PRINT_STUB_ERROR; return; } +void glUniform2uiv(void) { PRINT_STUB_ERROR; return; } +void glUniform3uiv(void) { PRINT_STUB_ERROR; return; } +void glUniform4uiv(void) { PRINT_STUB_ERROR; return; } +void glClearBufferiv(void) { PRINT_STUB_ERROR; return; } +void glClearBufferuiv(void) { PRINT_STUB_ERROR; return; } +void glClearBufferfv(void) { PRINT_STUB_ERROR; return; } +void glClearBufferfi(void) { PRINT_STUB_ERROR; return; } +void glGetStringi(void) { PRINT_STUB_ERROR; return; } +void glCopyBufferSubData(void) { PRINT_STUB_ERROR; return; } +void glGetUniformIndices(void) { PRINT_STUB_ERROR; return; } +void glGetActiveUniformsiv(void) { PRINT_STUB_ERROR; return; } +void glGetUniformBlockIndex(void) { PRINT_STUB_ERROR; return; } +void glGetActiveUniformBlockiv(void) { PRINT_STUB_ERROR; return; } +void glGetActiveUniformBlockName(void) { PRINT_STUB_ERROR; return; } +void glUniformBlockBinding(void) { PRINT_STUB_ERROR; return; } +void glDrawArraysInstanced(void) { PRINT_STUB_ERROR; return; } +void glDrawElementsInstanced(void) { PRINT_STUB_ERROR; return; } +void glFenceSync(void) { PRINT_STUB_ERROR; return; } +void glIsSync(void) { PRINT_STUB_ERROR; return; } +void glDeleteSync(void) { PRINT_STUB_ERROR; return; } +void glClientWaitSync(void) { PRINT_STUB_ERROR; return; } +void glWaitSync(void) { PRINT_STUB_ERROR; return; } +void glGetInteger64v(void) { PRINT_STUB_ERROR; return; } +void glGetSynciv(void) { PRINT_STUB_ERROR; return; } +void glGetInteger64i_v(void) { PRINT_STUB_ERROR; return; } +void glGetBufferParameteri64v(void) { PRINT_STUB_ERROR; return; } +void glGenSamplers(void) { PRINT_STUB_ERROR; return; } +void glDeleteSamplers(void) { PRINT_STUB_ERROR; return; } +void glIsSampler(void) { PRINT_STUB_ERROR; return; } +void glBindSampler(void) { PRINT_STUB_ERROR; return; } +void glSamplerParameteri(void) { PRINT_STUB_ERROR; return; } +void glSamplerParameteriv(void) { PRINT_STUB_ERROR; return; } +void glSamplerParameterf(void) { PRINT_STUB_ERROR; return; } +void glSamplerParameterfv(void) { PRINT_STUB_ERROR; return; } +void glGetSamplerParameteriv(void) { PRINT_STUB_ERROR; return; } +void glGetSamplerParameterfv(void) { PRINT_STUB_ERROR; return; } +void glVertexAttribDivisor(void) { PRINT_STUB_ERROR; return; } +void glBindTransformFeedback(void) { PRINT_STUB_ERROR; return; } +void glDeleteTransformFeedbacks(void) { PRINT_STUB_ERROR; return; } +void glGenTransformFeedbacks(void) { PRINT_STUB_ERROR; return; } +void glIsTransformFeedback(void) { PRINT_STUB_ERROR; return; } +void glPauseTransformFeedback(void) { PRINT_STUB_ERROR; return; } +void glResumeTransformFeedback(void) { PRINT_STUB_ERROR; return; } +void glGetProgramBinary(void) { PRINT_STUB_ERROR; return; } +void glProgramBinary(void) { PRINT_STUB_ERROR; return; } +void glProgramParameteri(void) { PRINT_STUB_ERROR; return; } +void glInvalidateFramebuffer(void) { PRINT_STUB_ERROR; return; } +void glInvalidateSubFramebuffer(void) { PRINT_STUB_ERROR; return; } +void glTexStorage2D(void) { PRINT_STUB_ERROR; return; } +void glTexStorage3D(void) { PRINT_STUB_ERROR; return; } +void glGetInternalformativ(void) { PRINT_STUB_ERROR; return; } + +/* OpenGL ES 3.1, incomplete */ + +void glProgramUniform1ui(void) { PRINT_STUB_ERROR; return; } +void glDispatchCompute(void) { PRINT_STUB_ERROR; return; } +void glMemoryBarrier(void) { PRINT_STUB_ERROR; return; } +void glBindImageTexture(void) { PRINT_STUB_ERROR; return; } +void glProgramUniformMatrix4fv(void) { PRINT_STUB_ERROR; return; } +void glProgramUniform1f(void) { PRINT_STUB_ERROR; return; } +void glProgramUniform2f(void) { PRINT_STUB_ERROR; return; } +void glProgramUniform3f(void) { PRINT_STUB_ERROR; return; } +void glProgramUniform3fv(void) { PRINT_STUB_ERROR; return; } +void glProgramUniform4f(void) { PRINT_STUB_ERROR; return; } +void glDrawElementsIndirect(void) { PRINT_STUB_ERROR; return; } + + diff --git a/opengles-3.1/stubs/Readme.txt b/opengles-3.1/stubs/Readme.txt new file mode 100644 index 0000000000..4c079b1b38 --- /dev/null +++ b/opengles-3.1/stubs/Readme.txt @@ -0,0 +1,2 @@ +These are dummy libraries for linking against at build time. +The application will require the real device libraries to run. diff --git a/opengles-3.1/stubs/SConscript b/opengles-3.1/stubs/SConscript new file mode 100644 index 0000000000..5d4cb87811 --- /dev/null +++ b/opengles-3.1/stubs/SConscript @@ -0,0 +1,11 @@ +Import("env") + +egl = env.SharedLibrary("EGL", "EGL.c") +alias = Alias("egl", egl) +Default(alias) +Export("egl") + +glesv2 = env.SharedLibrary("GLESv2", "GLESv2.c") +alias = Alias("glesv2", glesv2) +Default(alias) +Export("glesv2") diff --git a/scripts/check_bad_style.sh b/scripts/check_bad_style.sh index 827163f02c..cd6e9042c0 100755 --- a/scripts/check_bad_style.sh +++ b/scripts/check_bad_style.sh @@ -21,7 +21,7 @@ then exit -1 fi -grep -HnRE --exclude-dir=assembly "\buint " --exclude-dir=cl_kernels $DIRECTORIES | tee bad_style.log +grep -HnRE --exclude-dir=assembly "\buint " --exclude-dir=cl_kernels --exclude-dir=cs_shaders $DIRECTORIES | tee bad_style.log if [[ $(cat bad_style.log | wc -l) > 0 ]] then echo "" diff --git a/scripts/clang_tidy_rules.py b/scripts/clang_tidy_rules.py index 09dc2e1c70..c70395fae8 100755 --- a/scripts/clang_tidy_rules.py +++ b/scripts/clang_tidy_rules.py @@ -6,13 +6,14 @@ import re import sys def get_list_includes(): - return "include . 3rdparty/include kernels computer_vision".split() + return "opengles-3.1/include opengles-3.1/mali_include include . 3rdparty/include kernels computer_vision".split() def get_list_flags( filename, arch): assert arch in ["armv7", "aarch64"] flags = ["-std=c++11"] flags.append("-DARM_COMPUTE_CPP_SCHEDULER=1") flags.append("-DARM_COMPUTE_CL") + flags.append("-DARM_COMPUTE_GC") if arch == "aarch64": flags.append("-DARM_COMPUTE_AARCH64_V8_2") return flags diff --git a/scripts/fix_code_formatting.sh b/scripts/fix_code_formatting.sh index a07d2615af..721ade853f 100755 --- a/scripts/fix_code_formatting.sh +++ b/scripts/fix_code_formatting.sh @@ -22,7 +22,7 @@ DIRECTORIES="./arm_compute ./src ./examples ./tests ./utils ./support" if [ $# -eq 0 ] then - files=$(find $DIRECTORIES -type f \( -name \*.cpp -o -iname \*.h -o -name \*.inl -o -name \*.cl \)) + files=$(find $DIRECTORIES -type f \( -name \*.cpp -o -iname \*.h -o -name \*.inl -o -name \*.cl -o -name \*.cs \)) else files=$@ fi diff --git a/src/core/CL/cl_kernels/direct_convolution1x1.cl b/src/core/CL/cl_kernels/direct_convolution1x1.cl index 7b73b85eac..484bc35ef1 100644 --- a/src/core/CL/cl_kernels/direct_convolution1x1.cl +++ b/src/core/CL/cl_kernels/direct_convolution1x1.cl @@ -153,7 +153,7 @@ inline VEC_DATA_TYPE(DATA_TYPE, 8) extract_input_stride3_8(__global const DATA_T * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p weights_ptr + * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p src_ptr * @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes) * @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes) @@ -241,7 +241,7 @@ __kernel void direct_convolution1x1( * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p weights_ptr + * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p src_ptr * @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes) * @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes) diff --git a/src/core/CL/cl_kernels/direct_convolution3x3.cl b/src/core/CL/cl_kernels/direct_convolution3x3.cl index 1420d7c873..e6e3007c95 100644 --- a/src/core/CL/cl_kernels/direct_convolution3x3.cl +++ b/src/core/CL/cl_kernels/direct_convolution3x3.cl @@ -102,7 +102,7 @@ MULQ_SAT_IMPL(qs32x8, qs32x8) * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p weights_ptr + * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p src_ptr * @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes) * @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes) @@ -198,7 +198,7 @@ __kernel void direct_convolution3x3( * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p weights_ptr + * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p src_ptr * @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes) * @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes) diff --git a/src/core/CL/cl_kernels/direct_convolution5x5.cl b/src/core/CL/cl_kernels/direct_convolution5x5.cl index 6fdd019a14..12cf0fb68e 100644 --- a/src/core/CL/cl_kernels/direct_convolution5x5.cl +++ b/src/core/CL/cl_kernels/direct_convolution5x5.cl @@ -91,7 +91,7 @@ * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p weights_ptr + * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p src_ptr * @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes) * @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes) @@ -197,7 +197,7 @@ __kernel void direct_convolution5x5( * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p weights_ptr + * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p src_ptr * @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes) * @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes) diff --git a/src/core/Error.cpp b/src/core/Error.cpp index 2e699feeb9..3b0a012f5f 100644 --- a/src/core/Error.cpp +++ b/src/core/Error.cpp @@ -30,23 +30,29 @@ using namespace arm_compute; +Error arm_compute::create_error_va_list(ErrorCode error_code, const char *function, const char *file, const int line, const char *msg, va_list args) +{ + char out[512]; + int offset = snprintf(out, sizeof(out), "in %s %s:%d: ", function, file, line); + vsnprintf(out + offset, sizeof(out) - offset, msg, args); + + return Error(error_code, std::string(out)); +} + Error arm_compute::create_error(ErrorCode error_code, const char *function, const char *file, const int line, const char *msg, ...) { - char out[512]; va_list args; va_start(args, msg); - int offset = snprintf(out, sizeof(out), "in %s %s:%d: ", function, file, line); - vsnprintf(out + offset, sizeof(out) - offset, msg, args); + auto err = create_error_va_list(error_code, function, file, line, msg, args); va_end(args); - - return Error(error_code, std::string(out)); + return err; } void arm_compute::error(const char *function, const char *file, const int line, const char *msg, ...) { va_list args; va_start(args, msg); - auto err = create_error(ErrorCode::RUNTIME_ERROR, function, file, line, msg, args); + auto err = create_error_va_list(ErrorCode::RUNTIME_ERROR, function, file, line, msg, args); va_end(args); throw std::runtime_error(err.description()); } diff --git a/src/core/GLES_COMPUTE/GCKernelLibrary.cpp b/src/core/GLES_COMPUTE/GCKernelLibrary.cpp new file mode 100644 index 0000000000..fd362f1665 --- /dev/null +++ b/src/core/GLES_COMPUTE/GCKernelLibrary.cpp @@ -0,0 +1,716 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Utils.h" + +#include +#include +#include +#include +#include +#include + +using namespace arm_compute; + +GCProgram::GCProgram() + : _name(), _source() +{ +} + +GCProgram::GCProgram(std::string name, std::string source) + : _name(std::move(name)), _source(std::move(source)) +{ +} + +GLuint GCProgram::link_program(GLuint shader) +{ + GLuint program = ARM_COMPUTE_GL_CHECK(glCreateProgram()); + + GLint rvalue; + GLsizei length; + + ARM_COMPUTE_GL_CHECK(glAttachShader(program, shader)); + ARM_COMPUTE_GL_CHECK(glLinkProgram(program)); + ARM_COMPUTE_GL_CHECK(glDetachShader(program, shader)); + ARM_COMPUTE_GL_CHECK(glDeleteShader(shader)); + + // Check if there were some issues when linking the shader. + ARM_COMPUTE_GL_CHECK(glGetProgramiv(program, GL_LINK_STATUS, &rvalue)); + + if(rvalue == 0) + { + ARM_COMPUTE_GL_CHECK(glGetProgramiv(program, GL_INFO_LOG_LENGTH, &length)); + + std::vector log(length); + ARM_COMPUTE_GL_CHECK(glGetProgramInfoLog(program, length, nullptr, log.data())); + ARM_COMPUTE_ERROR("Error: Linker log:\n%s\n", log.data()); + + return 0; + } + + ARM_COMPUTE_GL_CHECK(glUseProgram(program)); + + return program; +} + +GLuint GCProgram::compile_shader(const std::string &build_options) +{ + GLuint shader = ARM_COMPUTE_GL_CHECK(glCreateShader(GL_COMPUTE_SHADER)); + + const char *src[] + { + "#version 310 es\n", + build_options.c_str(), + _source.c_str() + }; + + ARM_COMPUTE_GL_CHECK(glShaderSource(shader, sizeof(src) / sizeof(src[0]), src, nullptr)); + + ARM_COMPUTE_GL_CHECK(glCompileShader(shader)); + + // Check if there were any issues when compiling the shader + GLint rvalue; + GLsizei length; + + ARM_COMPUTE_GL_CHECK(glGetShaderiv(shader, GL_COMPILE_STATUS, &rvalue)); + + if(rvalue == 0) + { + ARM_COMPUTE_GL_CHECK(glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &length)); + + std::vector log(length); + ARM_COMPUTE_GL_CHECK(glGetShaderInfoLog(shader, length, nullptr, log.data())); + +#ifdef ARM_COMPUTE_DEBUG_ENABLED + std::istringstream ss(_source); + std::stringstream output_stream; + std::string line; + size_t line_num = 1; + + ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("GLES Shader build options:\n%s\n", build_options.c_str()); + while(std::getline(ss, line, '\n')) + { + output_stream << std::setw(6) << line_num << ": " << line << std::endl; + line_num++; + } + ARM_COMPUTE_LOG_INFO_STREAM_CORE("GLES Shader source code:" << output_stream.rdbuf()); +#endif /* ARM_COMPUTE_DEBUG_ENABLED */ + + ARM_COMPUTE_ERROR("Error: Compiler log:\n%s\n", log.data()); + + return 0; + } + + return shader; +} + +GCKernel::GCKernel() + : _name(), _program(), _params(), _shader_params(), _shader_params_binding_point(), _shader_params_index(), _shader_params_size() +{ +} + +GCKernel::GCKernel(std::string name, GLuint program) + : _name(std::move(name)), + _program(program), + _params(), + _shader_params(0), + _shader_params_binding_point(0), + _shader_params_index(0), + _shader_params_size(0) +{ + _params.clear(); + + ARM_COMPUTE_GL_CHECK(glGenBuffers(1, &_shader_params)); + + _shader_params_index = ARM_COMPUTE_GL_CHECK(glGetUniformBlockIndex(_program, _shader_params_name)); + ARM_COMPUTE_ERROR_ON_MSG((_shader_params_index == GL_INVALID_INDEX), "Failed to get index of %s", _shader_params_name); + ARM_COMPUTE_GL_CHECK(glGetActiveUniformBlockiv(_program, _shader_params_index, GL_UNIFORM_BLOCK_DATA_SIZE, &_shader_params_size)); + ARM_COMPUTE_ERROR_ON_MSG((_shader_params_size == 0), "Failed to get size of %s", _shader_params_name); +} + +void GCKernel::cleanup() +{ + ARM_COMPUTE_GL_CHECK(glDeleteBuffers(1, &_shader_params)); + ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_UNIFORM_BUFFER, 0)); + ARM_COMPUTE_GL_CHECK(glDeleteProgram(_program)); + ARM_COMPUTE_GL_CHECK(glUseProgram(0)); +} + +void GCKernel::use() +{ + ARM_COMPUTE_GL_CHECK(glUseProgram(_program)); +} + +void GCKernel::unuse() +{ + ARM_COMPUTE_GL_CHECK(glUseProgram(0)); +} + +void GCKernel::update_shader_params() +{ + ARM_COMPUTE_ERROR_ON_MSG((_shader_params_size != (int)(_params.size() * sizeof(_params[0]))), "Params size (%d) is not equal to shader params block size (%d)", _params.size() * sizeof(_params[0]), + _shader_params_size); + + ARM_COMPUTE_GL_CHECK(glUniformBlockBinding(_program, _shader_params_index, _shader_params_binding_point)); + ARM_COMPUTE_GL_CHECK(glBindBufferBase(GL_UNIFORM_BUFFER, _shader_params_binding_point, _shader_params)); + ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_UNIFORM_BUFFER, _shader_params)); + ARM_COMPUTE_GL_CHECK(glBufferData(GL_UNIFORM_BUFFER, _shader_params_size, _params.data(), GL_DYNAMIC_DRAW)); + ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_UNIFORM_BUFFER, 0)); +} + +const std::map GCKernelLibrary::_shader_program_map = +{ + { "absdiff", "absdiff.cs" }, + { "col2im", "convolution_layer.cs" }, + { "direct_convolution1x1", "direct_convolution1x1.cs" }, + { "direct_convolution3x3", "direct_convolution3x3.cs" }, + { "direct_convolution5x5", "direct_convolution5x5.cs" }, + { "pooling_layer_2", "pooling_layer.cs" }, + { "pooling_layer_3", "pooling_layer.cs" }, + { "pooling_layer_7", "pooling_layer.cs" }, + { "pooling_layer_3_optimized", "pooling_layer.cs" }, + { "pooling_layer_n", "pooling_layer.cs" }, + { "fill_image_borders_replicate", "fill_border.cs" }, + { "fill_image_borders_constant", "fill_border.cs" }, + { "gemm_accumulate_biases", "gemm.cs" }, + { "gemm_interleave4x4", "gemm.cs" }, + { "gemm_ma", "gemm.cs" }, + { "gemm_mm_interleaved_transposed", "gemm.cs" }, + { "gemm_mm_floating_point", "gemm.cs" }, + { "gemm_transpose1x4", "gemm.cs" }, + { "im2col_kernel3x3_padx0_pady0", "convolution_layer.cs" }, + { "im2col_generic", "convolution_layer.cs" }, + { "im2col_reduced", "convolution_layer.cs" }, + { "transpose", "transpose.cs" }, + { "activation_layer", "activation_layer.cs" }, + { "softmax_layer_max", "softmax_layer.cs" }, + { "softmax_layer_shift_exp_sum", "softmax_layer.cs" }, + { "softmax_layer_norm", "softmax_layer.cs" }, + { "pixelwise_mul_float", "pixelwise_mul_float.cs" }, + { "normalization_layer", "normalization_layer.cs" }, + { "batchnormalization_layer", "batchnormalization_layer.cs" }, + { "concatenate_depth", "concatenate.cs" }, + { "dropout", "dropout.cs" }, +}; + +const std::map GCKernelLibrary::_program_source_map = +{ +#ifdef EMBEDDED_KERNELS + { + "absdiff.cs", +#include "./cs_shaders/absdiff.csembed" + }, + { + "convolution_layer.cs", +#include "./cs_shaders/convolution_layer.csembed" + }, + { + "direct_convolution1x1.cs", +#include "./cs_shaders/direct_convolution1x1.csembed" + }, + { + "direct_convolution3x3.cs", +#include "./cs_shaders/direct_convolution3x3.csembed" + }, + { + "direct_convolution5x5.cs", +#include "./cs_shaders/direct_convolution5x5.csembed" + }, + { + "pooling_layer.cs", +#include "./cs_shaders/pooling_layer.csembed" + }, + { + "fill_border.cs", +#include "./cs_shaders/fill_border.csembed" + }, + { + "gemm.cs", +#include "./cs_shaders/gemm.csembed" + }, + { + "transpose.cs", +#include "./cs_shaders/transpose.csembed" + }, + { + "activation_layer.cs", +#include "./cs_shaders/activation_layer.csembed" + }, + { + "softmax_layer.cs", +#include "./cs_shaders/softmax_layer.csembed" + }, + { + "pixelwise_mul_float.cs", +#include "./cs_shaders/pixelwise_mul_float.csembed" + }, + { + "normalization_layer.cs", +#include "./cs_shaders/normalization_layer.csembed" + }, + { + "batchnormalization_layer.cs", +#include "./cs_shaders/batchnormalization_layer.csembed" + }, + { + "concatenate.cs", +#include "./cs_shaders/concatenate.csembed" + }, + { + "dropout.cs", +#include "./cs_shaders/dropout.csembed" + }, +#endif /* EMBEDDED_KERNELS */ +}; + +GCKernelLibrary::GCKernelLibrary() + : _display(EGL_NO_DISPLAY), _context(EGL_NO_CONTEXT), _frame_buffer(0), _tex_rt(0), _own_context(false), _shader_path("./"), _programs_map(), _built_programs_map() +{ +} + +GCKernelLibrary &GCKernelLibrary::get() +{ + static GCKernelLibrary _kernel_library; + return _kernel_library; +} + +GCKernel GCKernelLibrary::create_kernel(const std::string &shader_name, const StringSet &build_options_set) const +{ + // Find which program contains the kernel + auto shader_program_it = _shader_program_map.find(shader_name); + + if(_shader_program_map.end() == shader_program_it) + { + ARM_COMPUTE_ERROR("Shader %s not found in the GCKernelLibrary", shader_name.c_str()); + } + + // Check if the program has been built before with same build options. + const std::string program_name = shader_program_it->second; + const std::string build_options = stringify_set(build_options_set); + const std::string built_program_name = program_name + "_" + build_options; + auto built_program_it = _built_programs_map.find(built_program_name); + + GCKernel kernel; + + if(_built_programs_map.end() != built_program_it) + { + // If program has been built, retrieve to create kernel from it + kernel = built_program_it->second; + kernel.use(); + } + else + { + GCProgram program = load_program(program_name); + + std::string source_name = _shader_path + shader_program_it->second; + + // load shader + GLuint shader = program.compile_shader(build_options); + + // Build program + GLuint gles_program = program.link_program(shader); + + // Create GCKernel + kernel = GCKernel(shader_name, gles_program); + + // Add built program to internal map + _built_programs_map.emplace(built_program_name, kernel); + } + + return kernel; +} + +const std::string GCKernelLibrary::preprocess_shader(const std::string &shader_source) const +{ + enum class ParserStage + { + FIRST, + SKIP_COMMENTS = FIRST, + RESOLVE_INCLUDES, + SKIP_PREPROCESSOR_DIRECTIVES, + SEARCH_MACRO_DEFINITIONS, + EXPAND_MACRO_USES, + LAST + }; + + struct MacroDefinitionInfo + { + const std::vector param_list; + const std::string content; + }; + + // Found macro definitions so far + std::map macro_definitions; + + // Define a GLES compute shader parser function + std::function cs_parser; + cs_parser = [&](const std::string & src, ParserStage stage, int nested_level) -> std::string + { + std::string dst; + + if(stage == ParserStage::LAST || std::regex_match(src, std::regex(R"(\s*)"))) + { + return src; + } + auto next_stage = static_cast(static_cast(stage) + 1); + + std::string search_pattern; + switch(stage) + { + case ParserStage::SKIP_COMMENTS: + search_pattern = R"((/\*([^*]|\n|(\*+([^*/]|\n)))*\*+/)|(//.*))"; + break; + case ParserStage::RESOLVE_INCLUDES: + search_pattern = R"rgx((?:^|\n)[ \t]*#include "(.*)")rgx"; + break; + case ParserStage::SKIP_PREPROCESSOR_DIRECTIVES: + search_pattern = R"((^|\n)[ \t]*(#ifdef|#ifndef|#if)[^\n]+)"; + break; + case ParserStage::SEARCH_MACRO_DEFINITIONS: + search_pattern = R"((?:^|\n)[ \t]*#define[ \t]+(\w+)(?:\((\w+(?:[ \t]*,[ \t]*\w+)*)\))?(?: |\t|\\\n)*((?:(?:[^\\\n]|\\[^\n])*\\+\n)*(?:[ \t]*[^ \t\n]+)*)[ \t]*)"; + break; + case ParserStage::EXPAND_MACRO_USES: + { + if(macro_definitions.empty()) + { + // Nothing to expand + return src; + } + int i = 0; + for(auto &def : macro_definitions) + { + if(i == 0) + { + search_pattern = R"((\b)" + def.first; + } + else + { + search_pattern += R"(\b|\b)" + def.first; + } + i++; + } + search_pattern += R"(\b))"; + break; + } + default: + break; + } + + std::regex search_regex(search_pattern); + std::smatch match; + ptrdiff_t parsed_pos = 0; + if(std::regex_search(src, match, search_regex)) + { + // Pass the content before the match to the next stage + dst.append(cs_parser(src.substr(0, match.position()), next_stage, 0)); + parsed_pos = match.position() + match.length(); + + // Deal with the matched content + switch(stage) + { + case ParserStage::RESOLVE_INCLUDES: + { + // Replace with the included file contents + // And parse the content from the first stage + const std::string source_name = _shader_path + match.str(1); + dst.append(cs_parser(read_file(source_name, false), ParserStage::FIRST, 0)); + break; + } + case ParserStage::SEARCH_MACRO_DEFINITIONS: + { + std::regex params_regex(R"(\b\w+\b)"); + const std::string macro_param_str = match.str(2); + const std::vector macro_param_list( + std::sregex_token_iterator(macro_param_str.begin(), + macro_param_str.end(), + params_regex), + std::sregex_token_iterator()); + + const MacroDefinitionInfo info = + { + macro_param_list, + match.str(3) + }; + // Collect the macro definition data and not change the shader source + macro_definitions.insert(std::pair(match.str(1), info)); + dst.append(match.str()); + break; + } + case ParserStage::EXPAND_MACRO_USES: + { + ptrdiff_t args_str_length = 0; + std::vector args_list; + + // Walk through argument list, because the regular expression does NOT support nested parentheses + size_t cur_args_str_pos = match.position() + match.length(); + if(src[cur_args_str_pos++] == '(') + { + int nested_parentheses = 0; + ptrdiff_t cur_arg_pos = cur_args_str_pos; + ptrdiff_t cur_arg_length = 0; + + args_str_length++; + while(src[cur_args_str_pos] != ')' || nested_parentheses != 0) + { + switch(src[cur_args_str_pos++]) + { + case '(': + nested_parentheses++; + cur_arg_length++; + break; + case ',': + if(nested_parentheses == 0) + { + args_list.push_back(src.substr(cur_arg_pos, cur_arg_length)); + cur_arg_pos = cur_args_str_pos; + cur_arg_length = 0; + } + else + { + cur_arg_length++; + } + break; + case ' ': + case '\t': + if(cur_arg_length == 0) + { + cur_arg_pos++; + } + else + { + cur_arg_length++; + } + break; + case ')': + nested_parentheses--; + // no break here! + default: + cur_arg_length++; + break; + } + args_str_length++; + } + if(src[cur_args_str_pos] == ')' && nested_parentheses == 0) + { + args_list.push_back(src.substr(cur_arg_pos, cur_arg_length)); + } + args_str_length++; + } + + std::string expanded_content = match.str(); + const std::vector macro_param_list = macro_definitions.at(match.str()).param_list; + + if((nested_level != 0 || !macro_param_list.empty()) && macro_param_list.size() == args_list.size()) + { + parsed_pos += args_str_length; + expanded_content = macro_definitions.at(match.str()).content; + size_t i = 0; + for(auto ¶m_name : macro_param_list) + { + std::regex params_regex(R"(\b)" + param_name + R"(\b)"); + expanded_content.assign(std::regex_replace(expanded_content, params_regex, args_list[i])); + ++i; + } + // Expand macro recursively + expanded_content = cs_parser(expanded_content, stage, nested_level + 1); + + if(nested_level == 0) + { + const std::regex token_pasting_rgx = std::regex(R"(\b##\b)"); + if(std::regex_search(expanded_content, token_pasting_rgx)) + { + // Remove token pasting operator "##" + expanded_content.assign(std::regex_replace(expanded_content, std::regex(token_pasting_rgx), "")); + // Trim trailing whitespace + expanded_content.assign(std::regex_replace(expanded_content, std::regex(R"([ \t]*\\\n)"), "\n")); + } + else + { + // Do not expand the macro if the result does not have token pasting operator "##" + expanded_content = src.substr(match.position(), match.length() + args_str_length); + } + } + } + dst.append(expanded_content); + break; + } + case ParserStage::SKIP_COMMENTS: + case ParserStage::SKIP_PREPROCESSOR_DIRECTIVES: + default: + dst.append(match.str()); + break; + } + next_stage = stage; + } + dst.append(cs_parser(src.substr(parsed_pos, src.length() - parsed_pos), next_stage, 0)); + + return dst; + }; + + return cs_parser(shader_source, ParserStage::FIRST, 0); +} + +const GCProgram &GCKernelLibrary::load_program(const std::string &program_name) const +{ + const auto program_it = _programs_map.find(program_name); + + if(program_it != _programs_map.end()) + { + return program_it->second; + } + + GCProgram program; + +#ifdef EMBEDDED_KERNELS + const auto program_source_it = _program_source_map.find(program_name); + + if(_program_source_map.end() == program_source_it) + { + ARM_COMPUTE_ERROR("Embedded program for %s does not exist.", program_name.c_str()); + } + + // TODO(APPBROWSER-298): Do not call shader preprocessor here + // We should do the preprocess at compile time + // The preprocess_shader function is used for support "#include" directive and token pasting operator "##". + // This job could be done at compile time by using a python script in order to get better performance at runtime. + // BTW: We usually defined EMBEDDED_KERNELS in release build. + program = GCProgram(program_name, preprocess_shader(program_source_it->second)); +#else /* EMBEDDED_KERNELS */ + // Check for binary + std::string source_name = _shader_path + program_name; + if(std::ifstream(source_name).is_open()) + { + program = GCProgram(program_name, preprocess_shader(read_file(source_name, false))); + } + else + { + ARM_COMPUTE_ERROR("Shader file %s does not exist.", source_name.c_str()); + } +#endif /* EMBEDDED_KERNELS */ + + // Insert program to program map + const auto new_program = _programs_map.emplace(program_name, std::move(program)); + + return new_program.first->second; +} + +void GCKernelLibrary::setup_context() +{ + EGLBoolean res; + _display = eglGetDisplay(EGL_DEFAULT_DISPLAY); + + ARM_COMPUTE_ERROR_ON_MSG(_display == EGL_NO_DISPLAY, "Failed to get display: 0x%x.", eglGetError()); + + res = eglInitialize(_display, nullptr, nullptr); + + ARM_COMPUTE_ERROR_ON_MSG(res == EGL_FALSE, "Failed to initialize egl: 0x%x.", eglGetError()); + ARM_COMPUTE_UNUSED(res); + + const char *egl_extension_st = eglQueryString(_display, EGL_EXTENSIONS); + ARM_COMPUTE_ERROR_ON_MSG((strstr(egl_extension_st, "EGL_KHR_create_context") == nullptr), "Failed to query EGL_KHR_create_context"); + ARM_COMPUTE_ERROR_ON_MSG((strstr(egl_extension_st, "EGL_KHR_surfaceless_context") == nullptr), "Failed to query EGL_KHR_surfaceless_context"); + ARM_COMPUTE_UNUSED(egl_extension_st); + + const EGLint config_attribs[] = + { + EGL_RENDERABLE_TYPE, EGL_OPENGL_ES3_BIT_KHR, + EGL_NONE + }; + EGLConfig cfg; + EGLint count; + + res = eglChooseConfig(_display, config_attribs, &cfg, 1, &count); + + ARM_COMPUTE_ERROR_ON_MSG(res == EGL_FALSE, "Failed to choose config: 0x%x.", eglGetError()); + ARM_COMPUTE_UNUSED(res); + + res = eglBindAPI(EGL_OPENGL_ES_API); + + ARM_COMPUTE_ERROR_ON_MSG(res == EGL_FALSE, "Failed to bind api: 0x%x.", eglGetError()); + + const EGLint attribs[] = + { + EGL_CONTEXT_CLIENT_VERSION, 3, + EGL_NONE + }; + _context = eglCreateContext(_display, + cfg, + EGL_NO_CONTEXT, + attribs); + + ARM_COMPUTE_ERROR_ON_MSG(_context == EGL_NO_CONTEXT, "Failed to create context: 0x%x.", eglGetError()); + ARM_COMPUTE_UNUSED(res); + + res = eglMakeCurrent(_display, EGL_NO_SURFACE, EGL_NO_SURFACE, _context); + + ARM_COMPUTE_ERROR_ON_MSG(res == EGL_FALSE, "Failed to make current: 0x%x.", eglGetError()); + ARM_COMPUTE_UNUSED(res); +} + +void GCKernelLibrary::setup_dummy_fbo() +{ + ARM_COMPUTE_GL_CHECK(glGenFramebuffers(1, &_frame_buffer)); + ARM_COMPUTE_GL_CHECK(glBindFramebuffer(GL_FRAMEBUFFER, _frame_buffer)); + ARM_COMPUTE_GL_CHECK(glGenTextures(1, &_tex_rt)); + ARM_COMPUTE_GL_CHECK(glBindTexture(GL_TEXTURE_2D, _tex_rt)); + ARM_COMPUTE_GL_CHECK(glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, 1, 1, 0, GL_RGB, GL_UNSIGNED_BYTE, nullptr)); + ARM_COMPUTE_GL_CHECK(glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, _tex_rt, 0)); +} + +GCKernelLibrary::~GCKernelLibrary() +{ + for(auto &program : _built_programs_map) + { + static_cast(program.second).cleanup(); + } + + ARM_COMPUTE_GL_CHECK(glBindTexture(GL_TEXTURE_2D, 0)); + ARM_COMPUTE_GL_CHECK(glBindFramebuffer(GL_FRAMEBUFFER, 0)); + ARM_COMPUTE_GL_CHECK(glDeleteTextures(1, &_tex_rt)); + ARM_COMPUTE_GL_CHECK(glDeleteFramebuffers(1, &_frame_buffer)); + + if(_own_context) + { + eglDestroyContext(_display, _context); + eglTerminate(_display); + + _context = EGL_NO_CONTEXT; + _display = EGL_NO_DISPLAY; + } +} + +std::string GCKernelLibrary::stringify_set(const StringSet &s) const +{ + std::string concat_set; + + // Concatenate set + for(const auto &el : s) + { + concat_set += el + "\n"; + } + + return concat_set; +} diff --git a/src/core/GLES_COMPUTE/IGCKernel.cpp b/src/core/GLES_COMPUTE/IGCKernel.cpp new file mode 100644 index 0000000000..154a2c0c66 --- /dev/null +++ b/src/core/GLES_COMPUTE/IGCKernel.cpp @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" + +#include +#include + +using namespace arm_compute; + +void arm_compute::enqueue(IGCKernel &kernel, const Window &window, const gles::NDRange &lws) +{ + ARM_COMPUTE_UNUSED(kernel); + + if(kernel.kernel().get_program() == 0) + { + return; + } + + ARM_COMPUTE_ERROR_ON((0 == (window.x().end() - window.x().start())) || (0 == (window.y().end() - window.y().start()))); + + ARM_COMPUTE_ERROR_ON_MSG((((window.x().end() - window.x().start()) % (window.x().step() * lws[0])) != 0), + "window x end =%d, start=%d, step=%d, lws x=%d", window.x().end(), window.x().start(), window.x().step(), lws[0]); + ARM_COMPUTE_ERROR_ON_MSG((((window.y().end() - window.y().start()) % (window.y().step() * lws[1])) != 0), + "window y end =%d, start=%d, step=%d, lws y=%d", window.y().end(), window.y().start(), window.y().step(), lws[1]); + ARM_COMPUTE_ERROR_ON_MSG((((window.z().end() - window.z().start()) % (window.z().step() * lws[2])) != 0), + "window z end =%d, start=%d, step=%d, lws z=%d", window.z().end(), window.z().start(), window.z().step(), lws[2]); + + ARM_COMPUTE_GL_CHECK(glDispatchCompute((window.x().end() - window.x().start()) / (window.x().step() / lws[0]), + (window.y().end() - window.y().start()) / (window.y().step() / lws[1]), + (window.z().end() - window.z().start()) / (window.z().step() / lws[2]))); +} + +IGCKernel::IGCKernel() + : _kernel() +{ +} + +GCKernel &IGCKernel::kernel() +{ + return _kernel; +} + +template +unsigned int IGCKernel::num_arguments_per_tensor() const +{ + return 2 + 2 * dimension_size; +} + +template +void IGCKernel::add_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam ¶m, const Window &window) +{ + ARM_COMPUTE_ERROR_ON(tensor == nullptr); + + const ITensorInfo *info = tensor->info(); + const Strides &strides = info->strides_in_bytes(); + + // Calculate offset to the start of the window + unsigned int offset_first_element = info->offset_first_element_in_bytes(); + + for(unsigned int n = 0; n < info->num_dimensions(); ++n) + { + offset_first_element += window[n].start() * strides[n]; + } + + unsigned int idx_start = idx; + + for(unsigned int dimension = 0; dimension < dimension_size; dimension++) + { + _kernel.set_params(idx++, strides[dimension]); + _kernel.set_params(idx++, strides[dimension] * window[dimension].step()); + } + + _kernel.set_params(idx++, offset_first_element); + _kernel.set_params(idx++, param.buffer_data_type_shift); + + ARM_COMPUTE_GL_CHECK(glBindBufferBase(GL_SHADER_STORAGE_BUFFER, param.binding_point, tensor->gc_buffer())); + + ARM_COMPUTE_ERROR_ON_MSG(idx_start + num_arguments_per_tensor() != idx, + "add_%dD_tensor_argument() is supposed to add exactly %d arguments to the kernel", dimension_size, num_arguments_per_tensor()); + ARM_COMPUTE_UNUSED(idx_start); +} + +void IGCKernel::add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window) +{ + add_tensor_argument<1>(idx, tensor, BufferParam(binding_point, 0), window); +} + +void IGCKernel::add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam ¶m, const Window &window) +{ + add_tensor_argument<1>(idx, tensor, param, window); +} + +void IGCKernel::add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window) +{ + add_tensor_argument<2>(idx, tensor, BufferParam(binding_point, 0), window); +} + +void IGCKernel::add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam ¶m, const Window &window) +{ + add_tensor_argument<2>(idx, tensor, param, window); +} + +void IGCKernel::add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window) +{ + add_tensor_argument<3>(idx, tensor, BufferParam(binding_point, 0), window); +} + +void IGCKernel::add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam ¶m, const Window &window) +{ + add_tensor_argument<3>(idx, tensor, param, window); +} + +unsigned int IGCKernel::num_arguments_per_1D_tensor() const +{ + return num_arguments_per_tensor<1>(); +} + +unsigned int IGCKernel::num_arguments_per_2D_tensor() const +{ + return num_arguments_per_tensor<2>(); +} + +unsigned int IGCKernel::num_arguments_per_3D_tensor() const +{ + return num_arguments_per_tensor<3>(); +} diff --git a/src/core/GLES_COMPUTE/IGCSimple2DKernel.cpp b/src/core/GLES_COMPUTE/IGCSimple2DKernel.cpp new file mode 100644 index 0000000000..5bb479ed24 --- /dev/null +++ b/src/core/GLES_COMPUTE/IGCSimple2DKernel.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h" + +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +using namespace arm_compute; + +void IGCSimple2DKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window); + + _kernel.use(); + + Window slice = window.first_slice_window_2D(); + + do + { + unsigned int idx = 0; + add_2D_tensor_argument(idx, _input, 1, slice); + add_2D_tensor_argument(idx, _output, 2, slice); + _kernel.update_shader_params(); + enqueue(*this, slice); + } + while(window.slide_window_slice_2D(slice)); +} diff --git a/src/core/GLES_COMPUTE/IGCSimple3DKernel.cpp b/src/core/GLES_COMPUTE/IGCSimple3DKernel.cpp new file mode 100644 index 0000000000..61225d8533 --- /dev/null +++ b/src/core/GLES_COMPUTE/IGCSimple3DKernel.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h" + +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +using namespace arm_compute; + +void IGCSimple3DKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); + + Window slice = window.first_slice_window_3D(); + + _kernel.use(); + + do + { + unsigned int idx = 0; + unsigned int binding = 1; // SSBO binding starts from 1. + add_3D_tensor_argument(idx, _input, binding++, slice); + add_3D_tensor_argument(idx, _output, binding++, slice); + _kernel.update_shader_params(); + enqueue(*this, slice); + } + while(window.slide_window_slice_3D(slice)); +} diff --git a/src/core/GLES_COMPUTE/IGCSimpleKernel.cpp b/src/core/GLES_COMPUTE/IGCSimpleKernel.cpp new file mode 100644 index 0000000000..459601e68b --- /dev/null +++ b/src/core/GLES_COMPUTE/IGCSimpleKernel.cpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h" + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +using namespace arm_compute; + +IGCSimpleKernel::IGCSimpleKernel() + : _input(nullptr), _output(nullptr) +{ +} + +void IGCSimpleKernel::configure(const IGCTensor *input, IGCTensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined, const BorderSize &border_size) +{ + _input = input; + _output = output; + + // Configure kernel window + Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size); + AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); + + update_window_and_padding(win, + AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration), + output_access); + + output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size); + + IGCKernel::configure(win); +} diff --git a/src/core/GLES_COMPUTE/IGCTensor.cpp b/src/core/GLES_COMPUTE/IGCTensor.cpp new file mode 100644 index 0000000000..5576665243 --- /dev/null +++ b/src/core/GLES_COMPUTE/IGCTensor.cpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" + +using namespace arm_compute; + +IGCTensor::IGCTensor() + : _mapping(nullptr) +{ +} + +void IGCTensor::map(bool blocking) +{ + _mapping = do_map(blocking); +} + +void IGCTensor::unmap() +{ + do_unmap(); + _mapping = nullptr; +} + +void IGCTensor::clear() +{ + this->map(); + std::memset(static_cast(_mapping), 0, this->info()->total_size()); + this->unmap(); +} + +uint8_t *IGCTensor::buffer() const +{ + return _mapping; +} diff --git a/src/core/GLES_COMPUTE/OpenGLES.cpp b/src/core/GLES_COMPUTE/OpenGLES.cpp new file mode 100644 index 0000000000..fdfc085db2 --- /dev/null +++ b/src/core/GLES_COMPUTE/OpenGLES.cpp @@ -0,0 +1,820 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" + +#include +#include +#include + +using eglGetProcAddress_func = __eglMustCastToProperFunctionPointerType EGLAPIENTRY (*)(const char *procname); +using eglBindAPI_func = EGLBoolean EGLAPIENTRY (*)(EGLenum api); +using eglChooseConfig_func = EGLBoolean EGLAPIENTRY (*)(EGLDisplay dpy, const EGLint *attrib_list, EGLConfig *configs, EGLint config_size, EGLint *num_config); +using eglCreateContext_func = EGLContext EGLAPIENTRY (*)(EGLDisplay dpy, EGLConfig config, EGLContext share_context, const EGLint *attrib_list); +using eglDestroyContext_func = EGLBoolean EGLAPIENTRY (*)(EGLDisplay dpy, EGLContext ctx); +using eglGetDisplay_func = EGLDisplay EGLAPIENTRY (*)(EGLNativeDisplayType display_id); +using eglInitialize_func = EGLBoolean EGLAPIENTRY (*)(EGLDisplay dpy, EGLint *major, EGLint *minor); +using eglMakeCurrent_func = EGLBoolean EGLAPIENTRY (*)(EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext ctx); +using eglTerminate_func = EGLBoolean EGLAPIENTRY (*)(EGLDisplay dpy); +using eglGetError_func = EGLint EGLAPIENTRY (*)(); +using eglQueryString_func = char const * EGLAPIENTRY (*)(EGLDisplay dpy, EGLint name); +using glAttachShader_func = void GL_APIENTRY (*)(GLuint program, GLuint shader); +using glCompileShader_func = void GL_APIENTRY (*)(GLuint shader); +using glCreateProgram_func = GLuint GL_APIENTRY (*)(); +using glCreateShader_func = GLuint GL_APIENTRY (*)(GLenum type); +using glDeleteProgram_func = void GL_APIENTRY (*)(GLuint program); +using glDeleteShader_func = void GL_APIENTRY (*)(GLuint shader); +using glDetachShader_func = void GL_APIENTRY (*)(GLuint program, GLuint shader); +using glGetProgramInfoLog_func = void GL_APIENTRY (*)(GLuint program, GLsizei bufsize, GLsizei *length, GLchar *infolog); +using glGetProgramiv_func = void GL_APIENTRY (*)(GLuint program, GLenum pname, GLint *params); +using glGetShaderInfoLog_func = void GL_APIENTRY (*)(GLuint shader, GLsizei bufsize, GLsizei *length, GLchar *infolog); +using glGetShaderiv_func = void GL_APIENTRY (*)(GLuint shader, GLenum pname, GLint *params); +using glLinkProgram_func = void GL_APIENTRY (*)(GLuint program); +using glShaderSource_func = void GL_APIENTRY (*)(GLuint shader, GLsizei count, const GLchar *const *string, const GLint *length); +using glUseProgram_func = void GL_APIENTRY (*)(GLuint program); +using glBindBuffer_func = void GL_APIENTRY (*)(GLenum target, GLuint buffer); +using glBindBufferBase_func = void GL_APIENTRY (*)(GLenum target, GLuint index, GLuint buffer); +using glBufferData_func = void GL_APIENTRY (*)(GLenum target, GLsizeiptr size, const GLvoid *data, GLenum usage); +using glDeleteBuffers_func = void GL_APIENTRY (*)(GLsizei n, const GLuint *buffers); +using glDispatchCompute_func = void GL_APIENTRY (*)(GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z); +using glFlush_func = void GL_APIENTRY (*)(); +using glGenBuffers_func = void GL_APIENTRY (*)(GLsizei n, GLuint *buffers); +using glGetProgramResourceIndex_func = GLuint GL_APIENTRY (*)(GLuint program, GLenum programInterface, const GLchar *name); +using glGetUniformLocation_func = GLint GL_APIENTRY (*)(GLuint program, const GLchar *name); +using glMapBufferRange_func = void *GL_APIENTRY (*)(GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access); +using glMemoryBarrier_func = void GL_APIENTRY (*)(GLbitfield barriers); +using glUniform1ui_func = void GL_APIENTRY (*)(GLint location, GLuint v0); +using glUnmapBuffer_func = GLboolean GL_APIENTRY (*)(GLenum target); +using glGetError_func = GLenum GL_APIENTRY (*)(); +using glGetActiveUniformBlockiv_func = void GL_APIENTRY (*)(GLuint program, GLuint uniformBlockIndex, GLenum pname, GLint *params); +using glUniformBlockBinding_func = void GL_APIENTRY (*)(GLuint program, GLuint uniformBlockIndex, GLuint uniformBlockBinding); +using glGetUniformBlockIndex_func = GLuint GL_APIENTRY (*)(GLuint program, const GLchar *uniformBlockName); +using glGenTextures_func = void GL_APIENTRY (*)(GLsizei n, GLuint *textures); +using glDeleteTextures_func = void GL_APIENTRY (*)(GLsizei n, const GLuint *textures); +using glBindTexture_func = void GL_APIENTRY (*)(GLenum target, GLuint texture); +using glTexImage2D_func = void GL_APIENTRY (*)(GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, + const GLvoid *pixels); +using glGenFramebuffers_func = void GL_APIENTRY (*)(GLsizei n, GLuint *framebuffers); +using glDeleteFramebuffers_func = void GL_APIENTRY (*)(GLsizei n, const GLuint *framebuffers); +using glBindFramebuffer_func = void GL_APIENTRY (*)(GLenum target, GLuint framebuffer); +using glFramebufferTexture2D_func = void GL_APIENTRY (*)(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level); + +class GLESSymbols +{ +private: + void init() + { + void *egl_handle = dlopen("libEGL.so", RTLD_LAZY | RTLD_LOCAL); + void *glesv2_handle = dlopen("libGLESv2.so", RTLD_LAZY | RTLD_LOCAL); + void *glesv3_handle = dlopen("libGLESv3.so", RTLD_LAZY | RTLD_LOCAL); + if(egl_handle == nullptr) + { + std::cerr << "Can't load libEGL.so: " << dlerror() << std::endl; + } + else + { +#undef EGL_ENTRY +#define EGL_ENTRY(_api) _api = reinterpret_cast<_api##_func>(dlsym(egl_handle, #_api)); +#include "./egl_entries.in" +#undef EGL_ENTRY + + if(eglGetProcAddress != nullptr) + { +#undef EGL_ENTRY +#define EGL_ENTRY(_api) \ + if((_api) == nullptr) \ + (_api) = reinterpret_cast<_api##_func>(eglGetProcAddress(#_api)); +#include "./egl_entries.in" +#undef EGL_ENTRY + +#undef GL_ENTRY +#define GL_ENTRY(_api) _api = reinterpret_cast<_api##_func>(eglGetProcAddress(#_api)); +#include "./gl_entries.in" +#undef GL_ENTRY + } + + std::vector handles = { glesv3_handle, glesv2_handle }; + for(auto &handle : handles) + { + if(handle != nullptr) + { +#undef GL_ENTRY +#define GL_ENTRY(_api) \ + if((_api) == nullptr) \ + (_api) = reinterpret_cast<_api##_func>(dlsym(handle, #_api)); +#include "./gl_entries.in" +#undef GL_ENTRY + } + } + + if(glesv3_handle != nullptr) + { + dlclose(glesv3_handle); + } + if(glesv2_handle != nullptr) + { + dlclose(glesv2_handle); + } + dlclose(egl_handle); + } + } + bool _initialized = false; + +public: + static GLESSymbols &get() + { + static GLESSymbols symbols = GLESSymbols(); + if(!symbols._initialized) + { + symbols._initialized = true; + symbols.init(); + } + + return symbols; + } + +#undef EGL_ENTRY +#undef GL_ENTRY +#define EGL_ENTRY(_api) _api##_func _api = nullptr; +#define GL_ENTRY(_api) EGL_ENTRY(_api) +#include "./egl_entries.in" +#include "./gl_entries.in" +#undef EGL_ENTRY +#undef GL_ENTRY +}; + +bool arm_compute::opengles31_is_available() +{ + return GLESSymbols::get().glDispatchCompute != nullptr; +} + +__eglMustCastToProperFunctionPointerType EGLAPIENTRY eglGetProcAddress(const char *procname) +{ + auto func = GLESSymbols::get().eglGetProcAddress; + if(func != nullptr) + { + return func(procname); + } + else + { + return nullptr; + } +} + +EGLBoolean EGLAPIENTRY eglBindAPI(EGLenum api) +{ + auto func = GLESSymbols::get().eglBindAPI; + if(func != nullptr) + { + return func(api); + } + else + { + return EGL_FALSE; + } +} + +EGLBoolean EGLAPIENTRY eglChooseConfig(EGLDisplay dpy, const EGLint *attrib_list, EGLConfig *configs, EGLint config_size, EGLint *num_config) +{ + auto func = GLESSymbols::get().eglChooseConfig; + if(func != nullptr) + { + return func(dpy, attrib_list, configs, config_size, num_config); + } + else + { + return EGL_FALSE; + } +} + +EGLContext EGLAPIENTRY eglCreateContext(EGLDisplay dpy, EGLConfig config, EGLContext share_context, const EGLint *attrib_list) +{ + auto func = GLESSymbols::get().eglCreateContext; + if(func != nullptr) + { + return func(dpy, config, share_context, attrib_list); + } + else + { + return nullptr; + } +} + +EGLBoolean EGLAPIENTRY eglDestroyContext(EGLDisplay dpy, EGLContext ctx) +{ + auto func = GLESSymbols::get().eglDestroyContext; + if(func != nullptr) + { + return func(dpy, ctx); + } + else + { + return EGL_FALSE; + } +} + +EGLDisplay EGLAPIENTRY eglGetDisplay(EGLNativeDisplayType display_id) +{ + auto func = GLESSymbols::get().eglGetDisplay; + if(func != nullptr) + { + return func(display_id); + } + else + { + return nullptr; + } +} + +EGLBoolean EGLAPIENTRY eglInitialize(EGLDisplay dpy, EGLint *major, EGLint *minor) +{ + auto func = GLESSymbols::get().eglInitialize; + if(func != nullptr) + { + return func(dpy, major, minor); + } + else + { + return EGL_FALSE; + } +} + +EGLBoolean EGLAPIENTRY eglMakeCurrent(EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext ctx) +{ + auto func = GLESSymbols::get().eglMakeCurrent; + if(func != nullptr) + { + return func(dpy, draw, read, ctx); + } + else + { + return EGL_FALSE; + } +} + +EGLBoolean EGLAPIENTRY eglTerminate(EGLDisplay dpy) +{ + auto func = GLESSymbols::get().eglTerminate; + if(func != nullptr) + { + return func(dpy); + } + else + { + return EGL_FALSE; + } +} + +EGLint EGLAPIENTRY eglGetError() +{ + auto func = GLESSymbols::get().eglGetError; + if(func != nullptr) + { + return func(); + } + else + { + return GL_NO_ERROR; + } +} + +char const *EGLAPIENTRY eglQueryString(EGLDisplay dpy, EGLint name) +{ + auto func = GLESSymbols::get().eglQueryString; + if(func != nullptr) + { + return func(dpy, name); + } + else + { + return nullptr; + } +} + +void GL_APIENTRY glAttachShader(GLuint program, GLuint shader) +{ + auto func = GLESSymbols::get().glAttachShader; + if(func != nullptr) + { + return func(program, shader); + } + else + { + return; + } +} + +void GL_APIENTRY glCompileShader(GLuint shader) +{ + auto func = GLESSymbols::get().glCompileShader; + if(func != nullptr) + { + return func(shader); + } + else + { + return; + } +} + +GLuint GL_APIENTRY glCreateProgram() +{ + auto func = GLESSymbols::get().glCreateProgram; + if(func != nullptr) + { + return func(); + } + else + { + return 0; + } +} + +GLuint GL_APIENTRY glCreateShader(GLenum type) +{ + auto func = GLESSymbols::get().glCreateShader; + if(func != nullptr) + { + return func(type); + } + else + { + return 0; + } +} + +void GL_APIENTRY glDeleteProgram(GLuint program) +{ + auto func = GLESSymbols::get().glDeleteProgram; + if(func != nullptr) + { + return func(program); + } + else + { + return; + } +} + +void GL_APIENTRY glDeleteShader(GLuint shader) +{ + auto func = GLESSymbols::get().glDeleteShader; + if(func != nullptr) + { + return func(shader); + } + else + { + return; + } +} + +void GL_APIENTRY glDetachShader(GLuint program, GLuint shader) +{ + auto func = GLESSymbols::get().glDetachShader; + if(func != nullptr) + { + return func(program, shader); + } + else + { + return; + } +} + +void GL_APIENTRY glGetProgramInfoLog(GLuint program, GLsizei bufSize, GLsizei *length, GLchar *infoLog) +{ + auto func = GLESSymbols::get().glGetProgramInfoLog; + if(func != nullptr) + { + return func(program, bufSize, length, infoLog); + } + else + { + return; + } +} + +void GL_APIENTRY glGetProgramiv(GLuint program, GLenum pname, GLint *params) +{ + auto func = GLESSymbols::get().glGetProgramiv; + if(func != nullptr) + { + return func(program, pname, params); + } + else + { + return; + } +} + +void GL_APIENTRY glGetShaderInfoLog(GLuint shader, GLsizei bufSize, GLsizei *length, GLchar *infoLog) +{ + auto func = GLESSymbols::get().glGetShaderInfoLog; + if(func != nullptr) + { + return func(shader, bufSize, length, infoLog); + } + else + { + return; + } +} + +void GL_APIENTRY glGetShaderiv(GLuint shader, GLenum pname, GLint *params) +{ + auto func = GLESSymbols::get().glGetShaderiv; + if(func != nullptr) + { + return func(shader, pname, params); + } + else + { + return; + } +} + +void GL_APIENTRY glLinkProgram(GLuint program) +{ + auto func = GLESSymbols::get().glLinkProgram; + if(func != nullptr) + { + return func(program); + } + else + { + return; + } +} + +void GL_APIENTRY glShaderSource(GLuint shader, GLsizei count, const GLchar *const *string, const GLint *length) +{ + auto func = GLESSymbols::get().glShaderSource; + if(func != nullptr) + { + return func(shader, count, string, length); + } + else + { + return; + } +} + +void GL_APIENTRY glUseProgram(GLuint program) +{ + auto func = GLESSymbols::get().glUseProgram; + if(func != nullptr) + { + return func(program); + } + else + { + return; + } +} + +void GL_APIENTRY glBindBuffer(GLenum target, GLuint buffer) +{ + auto func = GLESSymbols::get().glBindBuffer; + if(func != nullptr) + { + return func(target, buffer); + } + else + { + return; + } +} + +void GL_APIENTRY glBindBufferBase(GLenum target, GLuint index, GLuint buffer) +{ + auto func = GLESSymbols::get().glBindBufferBase; + if(func != nullptr) + { + return func(target, index, buffer); + } + else + { + return; + } +} + +void GL_APIENTRY glBufferData(GLenum target, GLsizeiptr size, const GLvoid *data, GLenum usage) +{ + auto func = GLESSymbols::get().glBufferData; + if(func != nullptr) + { + return func(target, size, data, usage); + } + else + { + return; + } +} + +void GL_APIENTRY glDeleteBuffers(GLsizei n, const GLuint *buffers) +{ + auto func = GLESSymbols::get().glDeleteBuffers; + if(func != nullptr) + { + return func(n, buffers); + } + else + { + return; + } +} + +void GL_APIENTRY glDispatchCompute(GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z) +{ + auto func = GLESSymbols::get().glDispatchCompute; + if(func != nullptr) + { + return func(num_groups_x, num_groups_y, num_groups_z); + } + else + { + return; + } +} + +void GL_APIENTRY glFlush(void) +{ + auto func = GLESSymbols::get().glFlush; + if(func != nullptr) + { + return func(); + } + else + { + return; + } +} + +void GL_APIENTRY glGenBuffers(GLsizei n, GLuint *buffers) +{ + auto func = GLESSymbols::get().glGenBuffers; + if(func != nullptr) + { + return func(n, buffers); + } + else + { + return; + } +} + +GLuint GL_APIENTRY glGetProgramResourceIndex(GLuint program, GLenum programInterface, const GLchar *name) +{ + auto func = GLESSymbols::get().glGetProgramResourceIndex; + if(func != nullptr) + { + return func(program, programInterface, name); + } + else + { + return GL_INVALID_INDEX; + } +} + +GLint GL_APIENTRY glGetUniformLocation(GLuint program, const GLchar *name) +{ + auto func = GLESSymbols::get().glGetUniformLocation; + if(func != nullptr) + { + return func(program, name); + } + else + { + return -1; + } +} + +void *GL_APIENTRY glMapBufferRange(GLenum target, GLintptr offset, GLsizeiptr length, GLbitfield access) +{ + auto func = GLESSymbols::get().glMapBufferRange; + if(func != nullptr) + { + return func(target, offset, length, access); + } + else + { + return nullptr; + } +} + +void GL_APIENTRY glMemoryBarrier(GLbitfield barriers) +{ + auto func = GLESSymbols::get().glMemoryBarrier; + if(func != nullptr) + { + return func(barriers); + } + else + { + return; + } +} + +void GL_APIENTRY glUniform1ui(GLint location, GLuint v0) +{ + auto func = GLESSymbols::get().glUniform1ui; + if(func != nullptr) + { + return func(location, v0); + } + else + { + return; + } +} + +GLboolean GL_APIENTRY glUnmapBuffer(GLenum target) +{ + auto func = GLESSymbols::get().glUnmapBuffer; + if(func != nullptr) + { + return func(target); + } + else + { + return GL_FALSE; + } +} + +GLenum GL_APIENTRY glGetError(void) +{ + auto func = GLESSymbols::get().glGetError; + if(func != nullptr) + { + return func(); + } + else + { + return GL_NO_ERROR; + } +} + +void GL_APIENTRY glGetActiveUniformBlockiv(GLuint program, GLuint uniformBlockIndex, GLenum pname, GLint *params) +{ + auto func = GLESSymbols::get().glGetActiveUniformBlockiv; + if(func != nullptr) + { + return func(program, uniformBlockIndex, pname, params); + } + else + { + return; + } +} + +void GL_APIENTRY glUniformBlockBinding(GLuint program, GLuint uniformBlockIndex, GLuint uniformBlockBinding) +{ + auto func = GLESSymbols::get().glUniformBlockBinding; + if(func != nullptr) + { + return func(program, uniformBlockIndex, uniformBlockBinding); + } + else + { + return; + } +} + +GLuint GL_APIENTRY glGetUniformBlockIndex(GLuint program, const GLchar *uniformBlockName) +{ + auto func = GLESSymbols::get().glGetUniformBlockIndex; + if(func != nullptr) + { + return func(program, uniformBlockName); + } + else + { + return GL_INVALID_INDEX; + } +} + +void GL_APIENTRY glGenTextures(GLsizei n, GLuint *textures) +{ + auto func = GLESSymbols::get().glGenTextures; + if(func != nullptr) + { + return func(n, textures); + } + else + { + return; + } +} + +void GL_APIENTRY glDeleteTextures(GLsizei n, const GLuint *textures) +{ + auto func = GLESSymbols::get().glDeleteTextures; + if(func != nullptr) + { + return func(n, textures); + } + else + { + return; + } +} + +void GL_APIENTRY glBindTexture(GLenum target, GLuint texture) +{ + auto func = GLESSymbols::get().glBindTexture; + if(func != nullptr) + { + return func(target, texture); + } + else + { + return; + } +} + +void GL_APIENTRY glTexImage2D(GLenum target, GLint level, GLint internalformat, GLsizei width, GLsizei height, GLint border, GLenum format, GLenum type, const GLvoid *pixels) +{ + auto func = GLESSymbols::get().glTexImage2D; + if(func != nullptr) + { + return func(target, level, internalformat, width, height, border, format, type, pixels); + } + else + { + return; + } +} + +void GL_APIENTRY glGenFramebuffers(GLsizei n, GLuint *framebuffers) +{ + auto func = GLESSymbols::get().glGenFramebuffers; + if(func != nullptr) + { + return func(n, framebuffers); + } + else + { + return; + } +} + +void GL_APIENTRY glDeleteFramebuffers(GLsizei n, const GLuint *framebuffers) +{ + auto func = GLESSymbols::get().glDeleteFramebuffers; + if(func != nullptr) + { + return func(n, framebuffers); + } + else + { + return; + } +} + +void GL_APIENTRY glBindFramebuffer(GLenum target, GLuint framebuffer) +{ + auto func = GLESSymbols::get().glBindFramebuffer; + if(func != nullptr) + { + return func(target, framebuffer); + } + else + { + return; + } +} + +void GL_APIENTRY glFramebufferTexture2D(GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level) +{ + auto func = GLESSymbols::get().glFramebufferTexture2D; + if(func != nullptr) + { + return func(target, attachment, textarget, texture, level); + } + else + { + return; + } +} diff --git a/src/core/GLES_COMPUTE/cs_shaders/absdiff.cs b/src/core/GLES_COMPUTE/cs_shaders/absdiff.cs new file mode 100644 index 0000000000..f6113e13eb --- /dev/null +++ b/src/core/GLES_COMPUTE/cs_shaders/absdiff.cs @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in; +#include "helpers.h" + +layout(std140) uniform shader_params +{ + IMAGE_PARAM_DECLARATION(src1); + IMAGE_PARAM_DECLARATION(src2); + IMAGE_PARAM_DECLARATION(dst); +}; + +BUFFER_DECLARATION(src1, 1, uint, readonly); +BUFFER_DECLARATION(src2, 2, uint, readonly); +BUFFER_DECLARATION(dst, 3, uint, writeonly); + +/** Calculate the absolute difference of two input images. + * + * @param[in] src1_ptr Pointer to the first source image. Supported data types: U8 + * @param[in] src1_stride_x Stride of the first source image in X dimension (in bytes) + * @param[in] src1_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src1_stride_y Stride of the first source image in Y dimension (in bytes) + * @param[in] src1_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the first source image + * @param[in] src2_ptr Pointer to the second source image. Supported data types: Same as @p in1_ptr + * @param[in] src2_stride_x Stride of the second source image in X dimension (in bytes) + * @param[in] src2_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src2_stride_y Stride of the second source image in Y dimension (in bytes) + * @param[in] src2_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src2_offset_first_element_in_bytes The offset of the first element in the second source image + * @param[out] dst_ptr Pointer to the destination image. Supported data types: Same as @p in1_ptr + * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image + */ +void main(void) +{ + Image src1 = CONVERT_TO_IMAGE_STRUCT(src1); + Image src2 = CONVERT_TO_IMAGE_STRUCT(src2); + Image dst = CONVERT_TO_IMAGE_STRUCT(dst); + + uvec4 tmp1 = UNPACK(LOAD4(src1, CURRENT_OFFSET(src1)), uint, uvec4); + uvec4 tmp2 = UNPACK(LOAD4(src2, CURRENT_OFFSET(src2)), uint, uvec4); + uvec4 diff = uvec4(abs(ivec4(tmp1 - tmp2))); + + STORE4(dst, CURRENT_OFFSET(dst), PACK(diff, uvec4, uint)); +} diff --git a/src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs new file mode 100644 index 0000000000..fc9da114f7 --- /dev/null +++ b/src/core/GLES_COMPUTE/cs_shaders/activation_layer.cs @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in; + +#include "helpers.h" + +#ifdef DATA_TYPE_FP32 +precision highp float; +#elif defined(DATA_TYPE_FP16) +#if defined(LOGISTIC) || defined(TANH) || defined(SRELU) || defined(SQRT) +precision highp float; +#else /*LOGISTIC_TANH_SRELU_SQRT*/ +precision mediump float; +#endif /*LOGISTIC_TANH_SRELU_SQRT*/ +#endif /*DATA_TYPE_FP32*/ + +#define ABS_OP(a) abs((a)) +#define ADD_OP(a, b) ((a) + (b)) +#define SUB_OP(a, b) ((a) - (b)) +#define MUL_OP(a, b) ((a) * (b)) +#define MLA_OP(a, b, c) ((b) * (c) + (a)) +#define DIV_OP(a, b) ((a) / (b)) +#define EXP_OP(a) exp((a)) +#define LOG_OP(a) log((a)) +#define SQRT_OP(a) sqrt((a)) +#define CONST_ONE (1.f) + +// Logistic Activation +float logistic_op(float x) +{ + return DIV_OP(CONST_ONE, ADD_OP(CONST_ONE, EXP_OP(-x))); +} +// Hyperbolic Tangent Activation +float tanh_op(float x) +{ + float tmp = float(B_VAL) * x; + if(tmp > 10.f) + { + return MUL_OP(float(A_VAL), 1.f); + } + else if(tmp < -10.f) + { + return MUL_OP(float(A_VAL), -1.f); + } + else + { + return MUL_OP(float(A_VAL), tanh(tmp + 0.000001f)); + } +} +// RELU Tangent Activation +float relu_op(float x) +{ + return max(0.f, x); +} +// Bounded RELU Activation +float brelu_op(float x) +{ + return min(float(A_VAL), max(float(0.0), x)); +} +// Lower Upper Bounded RELU Activation +float lu_brelu_op(float x) +{ + return min(max(x, float(B_VAL)), float(A_VAL)); +} +// Leaky RELU Activation +float lrelu_op(float x) +{ + return (x > float(0.0)) ? x : MUL_OP(float(A_VAL), x); +} +// Soft RELU Activation +float srelu_op(float x) +{ + return LOG_OP(ADD_OP(CONST_ONE, EXP_OP(x))); +} +// Absolute Activation +float abs_op(float x) +{ + return ABS_OP(x); +} +// Square Activation +float square_op(float x) +{ + return MUL_OP(x, x); +} +// Square-root Activation +float sqrt_op(float x) +{ + return SQRT_OP(x); +} +// Linear Activation +float linear_op(float x) +{ + return MLA_OP(float(B_VAL), float(A_VAL), x); +} + +layout(std140) uniform shader_params +{ + TENSOR3D_PARAM_DECLARATION(src); + TENSOR3D_PARAM_DECLARATION(dst); +}; + +#ifdef DATA_TYPE_FP32 +BUFFER_DECLARATION(src, 1, float, readonly); +BUFFER_DECLARATION(dst, 2, float, writeonly); + +/** This performs an activation function floating point inputs. + * + * @note Activation function should be given as a preprocessor argument using "#define act_name". e.g. "#define TANH" + * @note A, B variables required by some activation functions are set using A_VAL= and B_VAL= respectively. + * + * @param[in] src_ptr Pointer to the source image. Supported data types: F32 + * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] dst_ptr Pointer to the destination image. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y ride of the destination image in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image + */ +void main(void) +{ + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + + float data = src_ptr[src.current_offset]; + float data_out = 0.f; + // Perform activation + +#ifdef LOGISTIC + data_out = logistic_op(data); +#elif defined(TANH) /*LOGISTIC*/ + data_out = tanh_op(data); +#elif defined(RELU) /*RELU*/ + data_out = relu_op(data); +#elif defined(BRELU) /*BRELU*/ + data_out = brelu_op(data); +#elif defined(LU_BRELU) /*LU_BRELU*/ + data_out = lu_brelu_op(data); +#elif defined(LRELU) /*LRELU*/ + data_out = lrelu_op(data); +#elif defined(SRELU) /*SRELU*/ + data_out = srelu_op(data); +#elif defined(ABS) /*ABS*/ + data_out = abs_op(data); +#elif defined(SQUARE) /*SQUARE*/ + data_out = square_op(data); +#elif defined(SQRT) /*SQRT*/ + data_out = sqrt_op(data); +#elif defined(LINEAR) /*LINEAR*/ + data_out = linear_op(data); +#else /*LOGISTIC*/ +#error Activation function not provided +#endif /*LOGISTIC*/ + + dst_ptr[dst.current_offset] = data_out; +} + +#elif defined(DATA_TYPE_FP16) +BUFFER_DECLARATION(src, 1, uint, readonly); +BUFFER_DECLARATION(dst, 2, uint, writeonly); + +/** This performs an activation function floating point inputs. + * + * @note Activation function should be given as a preprocessor argument using "#define act_name". e.g. "#define TANH" + * @note A, B variables required by some activation functions are set using A_VAL= and B_VAL= respectively. + * + * @param[in] src_ptr Pointer to the source image. Supported data types: F16 + * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] dst_ptr Pointer to the destination image. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y ride of the destination image in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image + */ +void main(void) +{ + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT_FP16(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst); + + uint data = src_ptr[src.current_offset >> 2]; + // Perform activation + float a = unpackHalf2x16(data).x; + float b = unpackHalf2x16(data).y; + vec2 data_out; +#ifdef LOGISTIC /*LOGISTIC*/ + data_out.x = logistic_op(a); + data_out.y = logistic_op(b); +#elif defined(TANH) /*TANH*/ + data_out.x = tanh_op(a); + data_out.y = tanh_op(b); +#elif defined(RELU) /*RELU*/ + data_out.x = relu_op(a); + data_out.y = relu_op(b); +#elif defined(BRELU) /*BRELU*/ + data_out.x = brelu_op(a); + data_out.y = brelu_op(b); +#elif defined(LU_BRELU) /*LU_BRELU*/ + data_out.x = lu_brelu_op(a); + data_out.y = lu_brelu_op(b); +#elif defined(LRELU) /*LRELU*/ + data_out.x = lrelu_op(a); + data_out.y = lrelu_op(b); +#elif defined(SRELU) /*SRELU*/ + data_out.x = srelu_op(a); + data_out.y = srelu_op(b); +#elif defined(ABS) /*ABS*/ + data_out.x = abs_op(a); + data_out.y = abs_op(b); +#elif defined(SQUARE) /*SQUARE*/ + data_out.x = square_op(a); + data_out.y = square_op(b); +#elif defined(SQRT) /*SQRT*/ + data_out.x = sqrt_op(a); + data_out.y = sqrt_op(b); +#elif defined(LINEAR) /*LINEAR*/ + data_out.x = linear_op(a); + data_out.y = linear_op(b); +#else /*LOGISTIC*/ +#error Activation function not provided +#endif /*LOGISTIC*/ + + dst_ptr[dst.current_offset >> 2] = packHalf2x16(data_out); +} +#endif /*DATA_TYPE_FP32*/ diff --git a/src/core/GLES_COMPUTE/cs_shaders/batchnormalization_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/batchnormalization_layer.cs new file mode 100644 index 0000000000..54880926cc --- /dev/null +++ b/src/core/GLES_COMPUTE/cs_shaders/batchnormalization_layer.cs @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in; + +#include "helpers.h" + +#ifdef DATA_TYPE_FP32 +precision highp float; +#elif defined(DATA_TYPE_FP16) +precision mediump float; +#endif /*DATA_TYPE_FP32*/ + +#define ADD_OP(a, b) ((a) + (b)) +#define SUB_OP(a, b) ((a) - (b)) +#define MUL_OP(a, b) ((a) * (b)) +#define INVSQRT_OP(a) inversesqrt((a)) +#define SQCVT_SAT(a) (a) + +layout(std140) uniform shader_params +{ + TENSOR3D_PARAM_DECLARATION(src); + TENSOR3D_PARAM_DECLARATION(dst); + VECTOR_PARAM_DECLARATION(mean); + VECTOR_PARAM_DECLARATION(var); + VECTOR_PARAM_DECLARATION(beta); + VECTOR_PARAM_DECLARATION(gamma); +}; + +#ifdef DATA_TYPE_FP32 +BUFFER_DECLARATION(src, 1, float, readonly); +BUFFER_DECLARATION(dst, 2, float, writeonly); +BUFFER_DECLARATION(mean, 3, float, readonly); +BUFFER_DECLARATION(var, 4, float, readonly); +BUFFER_DECLARATION(beta, 5, float, readonly); +BUFFER_DECLARATION(gamma, 6, float, readonly); + +/** Apply batch normalization. + * + * @note Epsilon parameter in the batch normalization equation should be given as a preprocessor argument using "#define EPSILON". e.g. "#define EPSILON 0.1" + * + * @param[in] src_ptr Pointer to the first source tensor. Supported data types: F32 + * @param[in] src_stride_x Stride of the first source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the first source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the first source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the first source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[in] mean_ptr Pointer to the mean source tensor. Supported data types: same as @p src_ptr + * @param[in] mean_stride_x Stride of the mean source tensor in X dimension (in bytes) + * @param[in] mean_step_x mean_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] mean_offset_first_element_in_bytes The offset of the first element in the mean source tensor + * @param[in] var_ptr Pointer to the var tensor. Supported data types: same as @p src_ptr + * @param[in] var_stride_x Stride of the var tensor in X dimension (in bytes) + * @param[in] var_step_x var_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] var_offset_first_element_in_bytes The offset of the first element in the var source tensor + * @param[in] beta_ptr Pointer to the beta source tensor. Supported data types: same as @p src_ptr + * @param[in] beta_stride_x Stride of the beta source tensor in X dimension (in bytes) + * @param[in] beta_step_x beta_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] beta_offset_first_element_in_bytes The offset of the first element in the beta source tensor + * @param[in] gamma_ptr Pointer to the gamma source tensor. Supported data types: same as @p src_ptr + * @param[in] gamma_stride_x Stride of the gamma source tensor in X dimension (in bytes) + * @param[in] gamma_step_x gamma_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] gamma_offset_first_element_in_bytes The offset of the first element in the gamma source tensor + */ +void main(void) +{ + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + Vector mean = CONVERT_TO_VECTOR_STRUCT(mean); + Vector var = CONVERT_TO_VECTOR_STRUCT(var); + Vector beta = CONVERT_TO_VECTOR_STRUCT(beta); + Vector gamma = CONVERT_TO_VECTOR_STRUCT(gamma); + + float input_value = 0.f; + float denominator = 0.f; + float numerator = 0.f; + float x_bar = 0.f; + float gamma_param = 0.f; + float beta_param = 0.f; + + uint current_slice = gl_GlobalInvocationID.z; + + input_value = src_ptr[src.current_offset]; + denominator = var_ptr[var.current_offset + (current_slice * var.stride_x) >> 2]; + denominator = INVSQRT_OP(ADD_OP(denominator, SQCVT_SAT(float(ESPILON)))); + + // Calculate x bar and store results + numerator = mean_ptr[mean.current_offset + (current_slice * mean.stride_x) >> 2]; + numerator = SUB_OP(input_value, numerator); + x_bar = MUL_OP(numerator, denominator); + + gamma_param = gamma_ptr[gamma.current_offset + (current_slice * beta.stride_x) >> 2]; + beta_param = beta_ptr[beta.current_offset + (current_slice * beta.stride_x) >> 2]; + + dst_ptr[dst.current_offset] = ADD_OP(MUL_OP(gamma_param, x_bar), beta_param); +} + +#elif defined(DATA_TYPE_FP16) +BUFFER_DECLARATION(src, 1, uint, ); +BUFFER_DECLARATION(dst, 2, uint, writeonly); +BUFFER_DECLARATION(mean, 3, uint, ); +BUFFER_DECLARATION(var, 4, uint, ); +BUFFER_DECLARATION(beta, 5, uint, ); +BUFFER_DECLARATION(gamma, 6, uint, ); + +/** Apply batch normalization. + * + * @note Epsilon parameter in the batch normalization equation should be given as a preprocessor argument using "#define EPSILON". e.g. "#define EPSILON 0.1" + * + * @param[in] src_ptr Pointer to the first source tensor. Supported data types: F16 + * @param[in] src_stride_x Stride of the first source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the first source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the first source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the first source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[in] mean_ptr Pointer to the mean source tensor. Supported data types: same as @p src_ptr + * @param[in] mean_stride_x Stride of the mean source tensor in X dimension (in bytes) + * @param[in] mean_step_x mean_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] mean_offset_first_element_in_bytes The offset of the first element in the mean source tensor + * @param[in] var_ptr Pointer to the var tensor. Supported data types: same as @p src_ptr + * @param[in] var_stride_x Stride of the var tensor in X dimension (in bytes) + * @param[in] var_step_x var_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] var_offset_first_element_in_bytes The offset of the first element in the var source tensor + * @param[in] beta_ptr Pointer to the beta source tensor. Supported data types: same as @p src_ptr + * @param[in] beta_stride_x Stride of the beta source tensor in X dimension (in bytes) + * @param[in] beta_step_x beta_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] beta_offset_first_element_in_bytes The offset of the first element in the beta source tensor + * @param[in] gamma_ptr Pointer to the gamma source tensor. Supported data types: same as @p src_ptr + * @param[in] gamma_stride_x Stride of the gamma source tensor in X dimension (in bytes) + * @param[in] gamma_step_x gamma_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] gamma_offset_first_element_in_bytes The offset of the first element in the gamma source tensor + */ +void main(void) +{ + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT_FP16(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst); + Vector mean = CONVERT_TO_VECTOR_STRUCT_FP16(mean); + Vector var = CONVERT_TO_VECTOR_STRUCT_FP16(var); + Vector beta = CONVERT_TO_VECTOR_STRUCT_FP16(beta); + Vector gamma = CONVERT_TO_VECTOR_STRUCT_FP16(gamma); + + vec2 input_value; + float denominator; + float numerator; + vec2 x_bar; + float gamma_param; + float beta_param; + + uint current_slice = gl_GlobalInvocationID.z; + if((current_slice % uint(2)) == uint(0)) + { + input_value = unpackHalf2x16(src_ptr[src.current_offset >> 2]); + denominator = unpackHalf2x16(var_ptr[(var.current_offset + current_slice * var.stride_x) >> 2]).x; + denominator = INVSQRT_OP(ADD_OP(denominator, SQCVT_SAT(float(ESPILON)))); + + //Calculate x bar and store results + numerator = unpackHalf2x16(mean_ptr[(mean.current_offset + current_slice * mean.stride_x) >> 2]).x; + x_bar = MUL_OP(SUB_OP(input_value, numerator), denominator); + + gamma_param = unpackHalf2x16(gamma_ptr[(gamma.current_offset + current_slice * beta.stride_x) >> 2]).x; + beta_param = unpackHalf2x16(beta_ptr[(beta.current_offset + current_slice * beta.stride_x) >> 2]).x; + + dst_ptr[dst.current_offset >> 2] = packHalf2x16(ADD_OP(MUL_OP(gamma_param, x_bar), beta_param)); + } + else + { + input_value = unpackHalf2x16(src_ptr[src.current_offset >> 2]); + denominator = unpackHalf2x16(var_ptr[(var.current_offset + current_slice * var.stride_x) >> 2]).y; + denominator = INVSQRT_OP(ADD_OP(denominator, SQCVT_SAT(float(ESPILON)))); + + //Calculate x bar and store results + numerator = unpackHalf2x16(mean_ptr[(mean.current_offset + current_slice * mean.stride_x) >> 2]).y; + x_bar = MUL_OP(SUB_OP(input_value, numerator), denominator); + + gamma_param = unpackHalf2x16(gamma_ptr[(gamma.current_offset + current_slice * beta.stride_x) >> 2]).y; + beta_param = unpackHalf2x16(beta_ptr[(beta.current_offset + current_slice * beta.stride_x) >> 2]).y; + + dst_ptr[dst.current_offset >> 2] = packHalf2x16(ADD_OP(MUL_OP(gamma_param, x_bar), beta_param)); + } +} +#endif /*DATA_TYPE_FP32*/ diff --git a/src/core/GLES_COMPUTE/cs_shaders/concatenate.cs b/src/core/GLES_COMPUTE/cs_shaders/concatenate.cs new file mode 100644 index 0000000000..65000f2de2 --- /dev/null +++ b/src/core/GLES_COMPUTE/cs_shaders/concatenate.cs @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in; +#include "helpers.h" + +#ifdef DATA_TYPE_FP32 +precision highp float; + +layout(std140) uniform shader_params +{ + TENSOR3D_PARAM_DECLARATION(src); + TENSOR3D_PARAM_DECLARATION(dst); +}; + +BUFFER_DECLARATION(src, 1, float, readonly); +BUFFER_DECLARATION(dst, 2, float, writeonly); + +/** This kernel concatenates the input tensor into the output tensor along the third dimension + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F32 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + */ +void main(void) +{ + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + + dst_ptr[dst.current_offset + uint(OFFSETS_Z >> 2)] = src_ptr[tensor3D_offset(src, -OFFSETS_X, -OFFSETS_Y, 0)]; +} + +#elif defined(DATA_TYPE_FP16) +precision mediump float; + +layout(std140) uniform shader_params +{ + TENSOR3D_PARAM_DECLARATION(src); + TENSOR3D_PARAM_DECLARATION(dst); +}; + +BUFFER_DECLARATION(src, 1, uvec2, readonly); +BUFFER_DECLARATION(dst, 2, uvec2, writeonly); + +/** This kernel concatenates the input tensor into the output tensor along the third dimension + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + */ +void main(void) +{ + Tensor3D src = GC_CONVERT_TO_TENSOR3D_STRUCT(src); + Tensor3D dst = GC_CONVERT_TO_TENSOR3D_STRUCT(dst); + + uvec2 packed_s; + GC_LOAD1_3D_OFFSET(packed_s, src, -OFFSETS_X, -OFFSETS_Y, 0); + dst_ptr[(dst.current_offset + uint(OFFSETS_Z)) >> 3] = packed_s; +} +#endif /*DATA_TYPE_FP32*/ \ No newline at end of file diff --git a/src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs new file mode 100644 index 0000000000..1a0c9f1d30 --- /dev/null +++ b/src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs @@ -0,0 +1,302 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in; +#include "helpers.h" + +#ifdef DATA_TYPE_FP16 +BUFFER_DECLARATION(src, 1, uint, readonly); +BUFFER_DECLARATION(dst, 2, uint, restrict); +#else // DATA_TYPE_FP16 +BUFFER_DECLARATION(src, 1, float, readonly); +BUFFER_DECLARATION(dst, 2, float, restrict); +#endif // DATA_TYPE_FP16 + +layout(std140) uniform shader_params +{ +#ifdef IM2COL_GENERIC + TENSOR3D_PARAM_DECLARATION(src); + IMAGE_PARAM_DECLARATION(dst); + uint filter_depth; + uint src_stride_w; + uint dst_stride_w; +#endif // IM2COL_GENERIC + +#ifdef IM2COL_REDUCED + TENSOR3D_PARAM_DECLARATION(src); + VECTOR_PARAM_DECLARATION(dst); + uint width; + uint height; +#endif // IM2COL_REDUCED + +#ifdef COL2IM + IMAGE_PARAM_DECLARATION(src); + TENSOR3D_PARAM_DECLARATION(dst); + uint width; +#endif // COL2IM +}; + +#ifdef DATA_TYPE_FP16 + +precision mediump float; + +#ifdef IM2COL_REDUCED +/** This kernel reshapes the tensor's low three dimensions to single row for GEMM operation + * + * @note The data type must be passed at compile time using "#define DATA_TYPE_FP16" + * @note In case biases will be added in late stage, "#define HAS_BIAS" has to be passed to append the final matrix with 1 in each row. + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Y processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[in] width The width of the input tensor + * @param[in] height The height of the input tensor + */ +void main(void) +{ + uvec3 pos = uvec3(gl_GlobalInvocationID.xyz); + uvec3 size = uvec3(gl_WorkGroupSize.xyz); + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT_FP16(src); + Tensor3D src_nostep = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP_FP16(src); + Vector dst = CONVERT_TO_VECTOR_STRUCT_NO_STEP_FP16(dst); + uint image_size = width * height; + uint element_count = src_step_x / src_stride_x; + uint tmp_out_offset = dst.current_offset + ((pos.x * element_count + pos.y * width + pos.z * image_size) * dst.stride_x); + uint width_fp16 = ((width + uint(1)) >> uint(1)); + uint tmp; + + // odd width + if(width % uint(2) != uint(0)) + { + // even row + if((pos.y + pos.z * height) % uint(2) == uint(0)) + { + LOAD1(tmp, src, src.current_offset >> uint(2)); + STORE1(dst, tmp_out_offset >> uint(2), tmp); + } + else + { + // special op + uint tmpleft = uint(0); + uint tmpright = uint(0); + LOAD1(tmpright, src, src.current_offset >> uint(2)); // right half + if(pos.x == uint(0)) + { + LOAD1(tmpleft, src, tensor3D_offset_fp16(src_nostep, int(width), int(pos.y) - 1, int(pos.z)) >> uint(2)); // left half + tmpright = (tmpleft & uint(0xffff)) + (tmpright << uint(16)); + } + else + { + LOAD1(tmpleft, src, tensor3D_offset_fp16(src_nostep, (int(pos.x) - 1) * int(element_count), int(pos.y), int(pos.z)) >> uint(2)); // left half + tmpright = ((tmpleft >> uint(16)) + (tmpright << uint(16))); + } + STORE1(dst, tmp_out_offset >> uint(2), tmpright); + } + } + else + { + LOAD1(tmp, src, src.current_offset >> uint(2)); + STORE1(dst, tmp_out_offset >> uint(2), tmp); + } + +#ifdef HAS_BIAS + // If it is the last thread in the 3 dimensional workgroup + if(pos.x == (size.x - 1) && pos.y == (size.y - 1) && pos.z == (size.z - 1)) + { + tmp_out_offset += dst.stride_x; + + // FIXME: need odd/even detection for tmp_out_offset? + mediump vec2 bias_vec = vec2(1.0f, 1.0f); + uint bias_u = packHalf2x16(bias_vec); + STORE1(dst, tmp_out_offset >> uint(2), bias_u); + } +#endif // HAS_BIAS +} +#endif // IM2COL_REDUCED + +#elif defined(DATA_TYPE_FP32) + +#ifdef IM2COL_GENERIC +/** This kernel performs a reshaping of the input tensor to a tensor used to perform convolution using GEMM. + * + * @note The data type must be passed at compile time using "#define DATA_TYPE_FP32" + * @note In case biases will be added to the convolution "#define HAS_BIAS" has to be passed to append the final matrix with 1 in each row. + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F32 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[in] filter_depth The depth of the used filter + * @param[in] src_stride_w Stride of the source tensor in W dimension (in bytes). + * @param[in] dst_stride_w Stride of the destination tensor in W dimension (in bytes). + */ +void main(void) +{ + uint xc = gl_GlobalInvocationID.x; // x coordinate in the convolved tensor + uint yc = gl_GlobalInvocationID.y; // y coordinate in the convolved tensor + uint ch = gl_GlobalInvocationID.z % filter_depth; // input feature map + uint batch = gl_GlobalInvocationID.z / filter_depth; // the batch + + // Calculate input indeces + uint xi = xc * uint(STRIDE_X) - uint(PAD_X); + uint yi = yc * uint(STRIDE_Y) - uint(PAD_Y); + uint input_offset = (src_offset_first_element_in_bytes + (ch * src_stride_z) + (batch * src_stride_w)) >> uint(2); + + // Calculate output indeces + uint xo = ch * uint(KERNEL_WIDTH) * uint(KERNEL_HEIGHT); + uint yo = xc + yc * uint(CONVOLVED_WIDTH); // Index of the convolution + uint output_offset = (dst_offset_first_element_in_bytes + (yo * dst_stride_y) + (batch * dst_stride_w) + xo) >> uint(2); + + // Linearize convolution elements + for(uint y = yi, y_e = yi + uint(KERNEL_HEIGHT); y < y_e; ++y) + { + for(uint x = xi, x_e = xi + uint(KERNEL_WIDTH); x < x_e; ++x) + { +#if PAD_X == 0 && PAD_Y == 0 + output_offset = input_offset + ((x * src_stride_x + y * src_stride_y) >> uint(2)); + STORE4(dst, output_offset, LOAD4(src, input_offset)); +#else // PAD_X == 0 && PAD_Y == 0 + if(x < 0 || x >= SRC_WIDTH || y < 0 || y >= SRC_HEIGHT) + { + STORE4(dst, output_offset, 0.0f); + } + else + { + output_offset = input_offset + (x * src_stride_x + y * src_stride_y) >> uint(2)); + STORE4(dst, output_offset, LOAD4(src, input_offset)); + } +#endif // PAD_X == 0 && PAD_Y == 0 + } + } + +#ifdef HAS_BIAS + if(ch == (uint(KERNEL_DEPTH) - 1)) + { + STORE4(dst, output_offset, 1.0f); + } +#endif // HAS_BIAS +} +#endif // IM2COL_GENERIC + +#ifdef IM2COL_REDUCED +/** This kernel reshapes the tensor's low three dimensions to single row for GEMM operation + * + * @note The data type must be passed at compile time using "#define DATA_TYPE_FP32" + * @note In case biases will be added in late stage, "#define HAS_BIAS" has to be passed to append the final matrix with 1 in each row. + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F32 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Y processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[in] width The width of the input tensor + * @param[in] height The height of the input tensor + */ +void main(void) +{ + uvec3 pos = uvec3(gl_GlobalInvocationID.xyz); + uvec3 size = uvec3(gl_WorkGroupSize.xyz); + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); + Vector dst = CONVERT_TO_VECTOR_STRUCT_NO_STEP(dst); + uint image_size = width * height; + uint tmp_out_offset = dst.current_offset + (((pos.x + pos.y * width + pos.z * image_size) * dst.stride_x) >> 2); + + STORE4(dst, tmp_out_offset, LOAD4(src, src.current_offset)); + +#ifdef HAS_BIAS + // If it is the last thread in the 3 dimensional workgroup + if(pos.x == (size.x - 1) && pos.y == (size.y - 1) && pos.z == (size.z - 1)) + { + tmp_out_offset += (dst.stride_x >> uint(2)); + STORE4(dst, tmp_out_offset, 1.f); + } +#endif // HAS_BIAS +} +#endif // IM2COL_REDUCED + +#ifdef COL2IM +/** This kernel performs a reshaping of the output of the convolution layer. + * + * @note The data type must be passed at compile time using "#define DATA_TYPE_FP32" + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F32 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[in] dst_stride_w Stride of the destination tensor in W dimension (in bytes) + */ +void main(void) +{ + uvec2 pos = uvec2(gl_GlobalInvocationID.xy); + Image src = CONVERT_TO_IMAGE_STRUCT(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + + uint idx = pos.x * dst.stride_z + (pos.y / width) * dst.stride_y + (pos.y % width) * dst.stride_x; + uint tmp_out_offset = dst.current_offset + (idx >> 2); + + STORE4(dst, tmp_out_offset, LOAD4(src, src.current_offset)); +} +#endif // COL2IM + +#else // DATA_TYPE_FP16 +#error Data type not supported +#endif // DATA_TYPE_FP16 diff --git a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs new file mode 100644 index 0000000000..3a31cb80a7 --- /dev/null +++ b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution1x1.cs @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in; + +#include "helpers.h" + +layout(std140) uniform shader_params +{ + TENSOR3D_PARAM_DECLARATION(src); + TENSOR3D_PARAM_DECLARATION(dst); + TENSOR3D_PARAM_DECLARATION(weights); +#ifdef BIAS + VECTOR_PARAM_DECLARATION(biases); +#endif /* BIAS */ + uint weights_stride_w; + uint weights_depth; +}; + +#if defined(DATA_TYPE_FP32) +precision highp float; + +BUFFER_DECLARATION(src, 1, float, readonly); +BUFFER_DECLARATION(dst, 2, float, writeonly); +BUFFER_DECLARATION(weights, 3, float, readonly); +#ifdef BIAS +BUFFER_DECLARATION(biases, 4, float, readonly); +#endif /* BIAS */ + +/** This kernel performs a direct convolution to convolve the low three dimensions. + * + * @note The data type must be passed at compile time using "#define DATA_TYPE_FP32" + * @note The convolution stride x must be passed at compile time using "#define STRIDE_X" e.g. "#define STRIDE_X 1" + * @note In case biases will be added to the convolution "#define HAS_BIAS" has to be passed to append the final matrix with 1 in each row. + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F32 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p src_ptr + * @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes) + * @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes) + * @param[in] weights_step_y weights_stride_y * number of elements along y processed per workitem(in bytes) + * @param[in] weights_stride_z Stride of the weights tensor in Z dimension (in bytes) + * @param[in] weights_step_z weights_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] weights_offset_first_element_in_bytes The offset of the first element in the weights tensor + * @param[in] biases_ptr Pointer to the biases tensor. Same as @p src_ptr + * @param[in] biases_stride_x Stride of the biases tensor in X dimension (in bytes) + * @param[in] biases_step_x biases_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] biases_offset_first_element_in_bytes The offset of the first element in the biases tensor + * @param[in] weights_stride_w Stride of the weights tensor in the 4th dimension + * @param[in] weights_depth The third dimensions of the weights tensors + */ +void main() +{ + Image src = CONVERT_TO_IMAGE_STRUCT(src); + Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(weights); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + +#ifdef BIAS + Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases); +#endif /* BIAS */ + + float pixels = CONVERT(0, float); + uint z_index = gl_GlobalInvocationID.z; + weights.current_offset += z_index * weights_stride_w >> 2; + float temp; + float temp_weight; + + for(int d = 0; d < int(weights_depth); ++d) + { + temp = LOAD4(src, CURRENT_OFFSET(src)); + temp_weight = LOAD4(weights, CURRENT_OFFSET(weights)); + pixels += temp * temp_weight; + + src.current_offset += (src_stride_z >> 2); + weights.current_offset += (weights_stride_z >> 2); + } + +#ifdef BIAS + pixels += LOAD4(biases, vector_offset(biases, int(z_index))); +#endif /* BIAS */ + + STORE4(dst, CURRENT_OFFSET(dst), pixels); +} +#elif defined(DATA_TYPE_FP16) +precision mediump float; + +BUFFER_DECLARATION(src, 1, uvec4, readonly); +BUFFER_DECLARATION(dst, 2, uvec4, writeonly); +BUFFER_DECLARATION(weights, 3, uint, readonly); +#ifdef BIAS +BUFFER_DECLARATION(biases, 4, uint, readonly); +#endif /* BIAS */ + +#if STRIDE_X == 2 +#define CONVOLVE(s, w) convolve_stride2(s, w) +#elif STRIDE_X == 1 /* STRIDE_X == 1 */ +#define CONVOLVE(s, w) convolve_stride1(s, w) +#else /* STRIDE_X not equals 1 or 2 */ +#error STRIDE_X larger than 2 is not supported +#endif /* STRIDE_X == 2 */ + +vec4[2] convolve_stride1(Image src, float w) +{ + uvec4 packed_s; + vec4 s[2]; + + GC_LOAD1_2D_OFFSET(packed_s, src, 0, 0); + + s[0] = vec4(unpackHalf2x16(packed_s.x), unpackHalf2x16(packed_s.y)); + s[1] = vec4(unpackHalf2x16(packed_s.z), unpackHalf2x16(packed_s.w)); + + s[0] *= w; + s[1] *= w; + + return s; +} + +vec4[2] convolve_stride2(Image src, float w) +{ + uvec4 packed_s; + vec4 s[2]; + vec4 r[2]; + + GC_LOAD1_2D_OFFSET(packed_s, src, 0, 0); + s[0] = vec4(unpackHalf2x16(packed_s.x), unpackHalf2x16(packed_s.y)); + s[1] = vec4(unpackHalf2x16(packed_s.z), unpackHalf2x16(packed_s.w)); + + r[0] = vec4(s[0].xz, s[1].xz); + + GC_LOAD1_2D_OFFSET(packed_s, src, 8, 0); + s[0] = vec4(unpackHalf2x16(packed_s.x), unpackHalf2x16(packed_s.y)); + s[1] = vec4(unpackHalf2x16(packed_s.z), unpackHalf2x16(packed_s.w)); + + r[1] = vec4(s[0].xz, s[1].xz); + + r[0] *= w; + r[1] *= w; + + return r; +} + +/** This kernel performs a direct convolution to convolve the low three dimensions. + * + * @note The data type must be passed at compile time using "#define DATA_TYPE_FP16" + * @note The convolution stride x must be passed at compile time using "#define STRIDE_X" e.g. "#define STRIDE_X 1" + * @note In case biases will be added to the convolution "#define HAS_BIAS" has to be passed to append the final matrix with 1 in each row. + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p src_ptr + * @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes) + * @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes) + * @param[in] weights_step_y weights_stride_y * number of elements along y processed per workitem(in bytes) + * @param[in] weights_stride_z Stride of the weights tensor in Z dimension (in bytes) + * @param[in] weights_step_z weights_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] weights_offset_first_element_in_bytes The offset of the first element in the weights tensor + * @param[in] biases_ptr Pointer to the biases tensor. Same as @p src_ptr + * @param[in] biases_stride_x Stride of the biases tensor in X dimension (in bytes) + * @param[in] biases_step_x biases_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] biases_offset_first_element_in_bytes The offset of the first element in the biases tensor + * @param[in] weights_stride_w Stride of the weights tensor in the 4th dimension + * @param[in] weights_depth The third dimensions of the weights tensors + */ +void main() +{ + Image src = GC_CONVERT_TO_IMAGE_STRUCT(src); + Tensor3D weights = GC_CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(weights); + Tensor3D dst = GC_CONVERT_TO_TENSOR3D_STRUCT(dst); + +#ifdef BIAS + Vector biases = GC_CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases); +#endif /* BIAS */ + + vec4 pixels[2]; + pixels[0] = vec4(0.f); + pixels[1] = vec4(0.f); + + uint z_index = gl_GlobalInvocationID.z; + + weights.current_offset += z_index * weights_stride_w; + + uint packed_w; + float w; + + for(int d = 0; d < int(weights_depth); ++d) + { + GC_LOAD1_3D_OFFSET(packed_w, weights, 0, 0, 0); + w = unpackHalf2x16(packed_w).x; + + vec4 r[2] = CONVOLVE(src, w); + pixels[0] += r[0]; + pixels[1] += r[1]; + + src.current_offset += src_stride_z; + weights.current_offset += weights_stride_z; + } + +#ifdef BIAS + uint packed_b; + float b; + + GC_LOAD1_1D_OFFSET(packed_b, biases, z_index); + + if(z_index % uint(2) == uint(0)) + { + b = unpackHalf2x16(packed_b).x; + } + else + { + b = unpackHalf2x16(packed_b).y; + } + + pixels[0] += vec4(b); + pixels[1] += vec4(b); +#endif /* BIAS */ + + uvec4 packed_d; + packed_d = uvec4(packHalf2x16(pixels[0].xy), packHalf2x16(pixels[0].zw), + packHalf2x16(pixels[1].xy), packHalf2x16(pixels[1].zw)); + GC_STORE1_3D_OFFSET(packed_d, dst, 0, 0, 0); +} +#else /* DATA_TYPE_FP32 */ +#error Data type not supported +#endif /* DATA_TYPE_FP32 */ diff --git a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs new file mode 100644 index 0000000000..67b92cb8cf --- /dev/null +++ b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution3x3.cs @@ -0,0 +1,1583 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in; + +#include "helpers.h" + +layout(std140) uniform shader_params +{ + TENSOR3D_PARAM_DECLARATION(src); + TENSOR3D_PARAM_DECLARATION(dst); + TENSOR3D_PARAM_DECLARATION(weights); +#ifdef BIAS + VECTOR_PARAM_DECLARATION(biases); +#endif /* BIAS */ + uint weights_stride_w; + uint weights_depth; +}; + +#define LOAD12(r, name, offset) \ + r.x = LOAD4(name, offset); \ + r.y = LOAD4(name, offset + uint(1)); \ + r.z = LOAD4(name, offset + uint(2)) + +#define LOAD3X3(r, name) \ + r[0] = LOAD4(name, tensor3D_offset(name, 0, 0, 0)); \ + r[1] = LOAD4(name, tensor3D_offset(name, 1, 0, 0)); \ + r[2] = LOAD4(name, tensor3D_offset(name, 2, 0, 0)); \ + r[3] = LOAD4(name, tensor3D_offset(name, 0, 1, 0)); \ + r[4] = LOAD4(name, tensor3D_offset(name, 1, 1, 0)); \ + r[5] = LOAD4(name, tensor3D_offset(name, 2, 1, 0)); \ + r[6] = LOAD4(name, tensor3D_offset(name, 0, 2, 0)); \ + r[7] = LOAD4(name, tensor3D_offset(name, 1, 2, 0)); \ + r[8] = LOAD4(name, tensor3D_offset(name, 2, 2, 0)) + +#if defined(PROCESS_1_ELEMENT) +BUFFER_DECLARATION(src, 1, float, readonly); +BUFFER_DECLARATION(dst, 2, float, writeonly); +BUFFER_DECLARATION(weights, 3, float, readonly); +#ifdef BIAS +BUFFER_DECLARATION(biases, 4, float, readonly); +#endif /* BIAS */ + +/** This kernel performs a direct convolution to convolve the low three dimensions. + * + * @note The data type must be passed at compile time using "#define DATA_TYPE_FP32" + * @note If biases are used then "define HAS_BIAS" has to be passed at compile time + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F32 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p src_ptr + * @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes) + * @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes) + * @param[in] weights_step_y weights_stride_y * number of elements along y processed per workitem(in bytes) + * @param[in] weights_stride_z Stride of the weights tensor in Z dimension (in bytes) + * @param[in] weights_step_z weights_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] weights_offset_first_element_in_bytes The offset of the first element in the weights tensor + * @param[in] biases_ptr Pointer to the biases tensor. Same as @p src_ptr + * @param[in] biases_stride_x Stride of the biases tensor in X dimension (in bytes) + * @param[in] biases_step_x biases_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] biases_offset_first_element_in_bytes The offset of the first element in the biases tensor + * @param[in] weights_stride_w Stride of the weights tensor in the 4th dimension + * @param[in] weights_depth The third dimensions of the weights tensors + */ +void main() +{ + Image src = CONVERT_TO_IMAGE_STRUCT(src); + Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(weights); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + +#ifdef BIAS + Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases); +#endif /* BIAS */ + + float pixels = CONVERT(0, float); + + uint z_index = gl_GlobalInvocationID.z; + + weights.current_offset += z_index * weights_stride_w >> 2; + + for(int d = 0; d < int(weights_depth); ++d) + { + vec3 temp; + vec3 w; + + LOAD12(temp, src, offset(src, 0, 0)); + LOAD12(w, weights, tensor3D_offset(weights, 0, 0, 0)); + + pixels += temp.x * w[0] + temp.y * w[1] + temp.z * w[2]; + + LOAD12(temp, src, offset(src, 0, 1)); + LOAD12(w, weights, tensor3D_offset(weights, 0, 1, 0)); + + pixels += temp.x * w[0] + temp.y * w[1] + temp.z * w[2]; + + LOAD12(temp, src, offset(src, 0, 2)); + LOAD12(w, weights, tensor3D_offset(weights, 0, 2, 0)); + + pixels += temp.x * w[0] + temp.y * w[1] + temp.z * w[2]; + + src.current_offset += src_stride_z >> 2; + weights.current_offset += weights_stride_z >> 2; + } + +#ifdef BIAS + pixels += LOAD4(biases, vector_offset(biases, int(z_index))); +#endif /* BIAS */ + + STORE4(dst, CURRENT_OFFSET(dst), pixels); +} +#elif defined(PROCESS_8_ELEMENT) +BUFFER_DECLARATION(src, 1, vec4, readonly); +BUFFER_DECLARATION(dst, 2, vec4, writeonly); +BUFFER_DECLARATION(weights, 3, float, readonly); +#ifdef BIAS +BUFFER_DECLARATION(biases, 4, float, readonly); +#endif /* BIAS */ + +#if STRIDE_X == 2 +#define CONVOLVE1x3(offset, w) convolve1x3_stride2(offset, w) +#elif STRIDE_X == 1 /* STRIDE_X == 1 */ +#define CONVOLVE1x3(offset, w) convolve1x3_stride1(offset, w) +#else /* STRIDE_X not equals 1 or 2 */ +#error STRIDE_X larger than 2 is not supported +#endif /* STRIDE_X == 2 */ + +vec4[2] convolve1x3_stride1(uint offset, vec3 w) +{ + vec4 middle; + vec4 right; + vec4 tmp[3]; + vec4 r[2]; + + LOAD3(tmp, src, offset); + + middle = vec4(tmp[0].yzw, tmp[1].x); + right = vec4(tmp[0].zw, tmp[1].xy); + + r[0] = tmp[0] * w[0] + middle * w[1] + right * w[2]; + + middle = vec4(tmp[1].yzw, tmp[2].x); + right = vec4(tmp[1].zw, tmp[2].xy); + + r[1] = tmp[1] * w[0] + middle * w[1] + right * w[2]; + + return r; +} + +vec4[2] convolve1x3_stride2(uint offset, vec3 w) +{ + vec4 left; + vec4 middle; + vec4 right; + vec4 tmp[3]; + vec4 r[2]; + + LOAD3(tmp, src, offset); + + left = vec4(tmp[0].xz, tmp[1].xz); + middle = vec4(tmp[0].yw, tmp[1].yw); + right = vec4(tmp[0].z, tmp[1].xz, tmp[2].x); + + r[0] = left * w[0] + middle * w[1] + right * w[2]; + + LOAD2(tmp, src, offset + ((uint(3) * src_stride_x) >> 2)); + + left = vec4(tmp[2].xz, tmp[0].xz); + middle = vec4(tmp[2].yw, tmp[0].yw); + right = vec4(tmp[2].z, tmp[0].xz, tmp[1].x); + + r[1] = left * w[0] + middle * w[1] + right * w[2]; + + return r; +} + +/** An optimized direct convolution 3x3 OpenGL ES compute shader for process 8 elements at once + * + * @note This OpenGL ES shader works with stride_x = 1 and 2 + * @note The data type must be passed at compile time using "#define DATA_TYPE_FP32" + * @note If biases are used then "define HAS_BIAS" has to be passed at compile time + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F32 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p src_ptr + * @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes) + * @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes) + * @param[in] weights_step_y weights_stride_y * number of elements along y processed per workitem(in bytes) + * @param[in] weights_stride_z Stride of the weights tensor in Z dimension (in bytes) + * @param[in] weights_step_z weights_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] weights_offset_first_element_in_bytes The offset of the first element in the weights tensor + * @param[in] biases_ptr Pointer to the biases tensor. Same as @p src_ptr + * @param[in] biases_stride_x Stride of the biases tensor in X dimension (in bytes) + * @param[in] biases_step_x biases_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] biases_offset_first_element_in_bytes The offset of the first element in the biases tensor + * @param[in] weights_stride_w Stride of the weights tensor in the 4th dimension + * @param[in] weights_depth The third dimensions of the weights tensors + */ +void main() +{ + Image src = CONVERT_TO_IMAGE_STRUCT(src); + Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(weights); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + +#ifdef BIAS + Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases); +#endif /* BIAS */ + + vec4 pixels[2]; + pixels[0] = vec4(0); + pixels[1] = vec4(0); + + uint z_index = gl_GlobalInvocationID.z; + + weights.current_offset += z_index * weights_stride_w >> 2; + + for(int d = 0; d < int(weights_depth); ++d) + { + // load 3 weights once + vec3 w; + vec4 r[2]; + + // first line + LOAD3(w, weights, tensor3D_offset(weights, 0, 0, 0)); + + r = CONVOLVE1x3(src.current_offset >> uint(2), w); + pixels[0] += r[0]; + pixels[1] += r[1]; + + // second line + LOAD3(w, weights, tensor3D_offset(weights, 0, 1, 0)); + + r = CONVOLVE1x3((src.current_offset + (src_stride_y >> 2)) >> uint(2), w); + pixels[0] += r[0]; + pixels[1] += r[1]; + + // third line + LOAD3(w, weights, tensor3D_offset(weights, 0, 2, 0)); + + r = CONVOLVE1x3((src.current_offset + (src_stride_y >> 1)) >> uint(2), w); + pixels[0] += r[0]; + pixels[1] += r[1]; + + src.current_offset += src_stride_z >> 2; + weights.current_offset += weights_stride_z >> 2; + } + +#ifdef BIAS + float b; + LOAD1(b, biases, vector_offset(biases, int(z_index))); + pixels[0] += vec4(b); + pixels[1] += vec4(b); +#endif /* BIAS */ + + STORE2(dst, dst.current_offset >> uint(2), pixels); +} +#elif defined(PROCESS_4_ELEMENT) +BUFFER_DECLARATION(src, 1, vec4, readonly); +BUFFER_DECLARATION(dst, 2, vec4, writeonly); +BUFFER_DECLARATION(weights, 3, float, readonly); +#ifdef BIAS +BUFFER_DECLARATION(biases, 4, float, readonly); +#endif /* BIAS */ + +#if STRIDE_X == 2 +#define CONVOLVE1x3(offset, w) convolve1x3_stride2(offset, w) +#elif STRIDE_X == 1 /* STRIDE_X == 1 */ +#define CONVOLVE1x3(offset, w) convolve1x3_stride1(offset, w) +#else /* STRIDE_X not equals 1 or 2 */ +#error STRIDE_X larger than 2 is not supported +#endif /* STRIDE_X == 2 */ + +vec4 convolve1x3_stride1(uint offset, vec3 w) +{ + vec4 tmp[2]; + vec4 middle; + vec4 right; + + LOAD2(tmp, src, offset); + + middle = vec4(tmp[0].yzw, tmp[1].x); + right = vec4(tmp[0].zw, tmp[1].xy); + + tmp[1] = tmp[0] * w[0] + middle * w[1] + right * w[2]; + + return tmp[1]; +} + +vec4 convolve1x3_stride2(uint offset, vec3 w) +{ + vec4 left; + vec4 middle; + vec4 right; + + vec4 tmp[3]; + + LOAD3(tmp, src, offset); + + left = vec4(tmp[0].xz, tmp[1].xz); + middle = vec4(tmp[0].yw, tmp[1].yw); + right = vec4(tmp[0].z, tmp[1].xz, tmp[2].x); + + tmp[0] = left * w[0] + middle * w[1] + right * w[2]; + + return tmp[0]; +} + +/** An optimized direct convolution 3x3 OpenGL ES compute shader for process 4 elements at once + * + * @note This OpenGL ES shader works with stride_x = 1 and 2 + * @note The data type must be passed at compile time using "#define DATA_TYPE_FP32" + * @note If biases are used then "define HAS_BIAS" has to be passed at compile time + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F32 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p src_ptr + * @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes) + * @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes) + * @param[in] weights_step_y weights_stride_y * number of elements along y processed per workitem(in bytes) + * @param[in] weights_stride_z Stride of the weights tensor in Z dimension (in bytes) + * @param[in] weights_step_z weights_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] weights_offset_first_element_in_bytes The offset of the first element in the weights tensor + * @param[in] biases_ptr Pointer to the biases tensor. Same as @p src_ptr + * @param[in] biases_stride_x Stride of the biases tensor in X dimension (in bytes) + * @param[in] biases_step_x biases_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] biases_offset_first_element_in_bytes The offset of the first element in the biases tensor + * @param[in] weights_stride_w Stride of the weights tensor in the 4th dimension + * @param[in] weights_depth The third dimensions of the weights tensors + */ +void main() +{ + Image src = CONVERT_TO_IMAGE_STRUCT(src); + Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(weights); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + +#ifdef BIAS + Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases); +#endif /* BIAS */ + + vec4 pixels; + pixels = vec4(0); + + uint z_index = gl_GlobalInvocationID.z; + + weights.current_offset += z_index * weights_stride_w >> 2; + + for(int d = 0; d < int(weights_depth); ++d) + { + // load 3 weights once + vec3 w; + + // first line + LOAD3(w, weights, tensor3D_offset(weights, 0, 0, 0)); + + pixels += CONVOLVE1x3(src.current_offset >> uint(2), w); + + // second line + LOAD3(w, weights, tensor3D_offset(weights, 0, 1, 0)); + + pixels += CONVOLVE1x3((src.current_offset + (src_stride_y >> 2)) >> uint(2), w); + + // third line + LOAD3(w, weights, tensor3D_offset(weights, 0, 2, 0)); + + pixels += CONVOLVE1x3((src.current_offset + (src_stride_y >> 1)) >> uint(2), w); + + src.current_offset += src_stride_z >> 2; + weights.current_offset += weights_stride_z >> 2; + } + +#ifdef BIAS + float b; + LOAD1(b, biases, vector_offset(biases, int(z_index))); + pixels += vec4(b); +#endif /* BIAS */ + + STORE1(dst, dst.current_offset >> uint(2), pixels); +} +#elif defined(PROCESS_X_4ELEMENTS_Y_3ELEMENTS) +BUFFER_DECLARATION(src, 1, vec4, readonly); +BUFFER_DECLARATION(dst, 2, vec4, writeonly); +BUFFER_DECLARATION(weights, 3, float, readonly); +#ifdef BIAS +BUFFER_DECLARATION(biases, 4, float, readonly); +#endif /* BIAS */ + +#define CONVOLVE1x3(left, middle, right, w) convolve1x3_stride1(left, middle, right, w) + +vec4 convolve1x3_stride1(vec4 left, vec4 middle, vec4 right, vec3 w) +{ + vec4 r; + + r = left * w[0] + middle * w[1] + right * w[2]; + + return r; +} + +/** An optimized direct convolution 3x3 OpenGL ES compute shader for process 4x3 elements at once + * + * @note This OpenGL ES shader works with stride_x = 1 and 2 + * @note The data type must be passed at compile time using "#define DATA_TYPE_FP32" + * @note If biases are used then "define HAS_BIAS" has to be passed at compile time + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F32 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p src_ptr + * @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes) + * @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes) + * @param[in] weights_step_y weights_stride_y * number of elements along y processed per workitem(in bytes) + * @param[in] weights_stride_z Stride of the weights tensor in Z dimension (in bytes) + * @param[in] weights_step_z weights_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] weights_offset_first_element_in_bytes The offset of the first element in the weights tensor + * @param[in] biases_ptr Pointer to the biases tensor. Same as @p src_ptr + * @param[in] biases_stride_x Stride of the biases tensor in X dimension (in bytes) + * @param[in] biases_step_x biases_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] biases_offset_first_element_in_bytes The offset of the first element in the biases tensor + * @param[in] weights_stride_w Stride of the weights tensor in the 4th dimension + * @param[in] weights_depth The third dimensions of the weights tensors + */ +void main() +{ + Image src = CONVERT_TO_IMAGE_STRUCT(src); + Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(weights); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + +#ifdef BIAS + Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases); +#endif /* BIAS */ + + vec4 pixels[3]; + pixels[0] = vec4(0); + pixels[1] = vec4(0); + pixels[2] = vec4(0); + + uint z_index = gl_GlobalInvocationID.z; + + weights.current_offset += z_index * weights_stride_w >> 2; + + for(int d = 0; d < int(weights_depth); ++d) + { + // load 3 weights once + vec3 w[3]; + + LOAD3(w[0], weights, tensor3D_offset(weights, 0, 0, 0)); + LOAD3(w[1], weights, tensor3D_offset(weights, 0, 1, 0)); + LOAD3(w[2], weights, tensor3D_offset(weights, 0, 2, 0)); + + vec4 s[2]; + vec4 middle; + vec4 right; + // first line + LOAD2(s, src, src.current_offset >> uint(2)); + middle = vec4(s[0].yzw, s[1].x); + right = vec4(s[0].zw, s[1].xy); + pixels[0] += CONVOLVE1x3(s[0], middle, right, w[0]); + + // second line + LOAD2(s, src, (src.current_offset + (src_stride_y >> 2)) >> uint(2)); + middle = vec4(s[0].yzw, s[1].x); + right = vec4(s[0].zw, s[1].xy); + pixels[0] += CONVOLVE1x3(s[0], middle, right, w[1]); + pixels[1] += CONVOLVE1x3(s[0], middle, right, w[0]); + + // third line + LOAD2(s, src, (src.current_offset + (src_stride_y >> 1)) >> uint(2)); + middle = vec4(s[0].yzw, s[1].x); + right = vec4(s[0].zw, s[1].xy); + pixels[0] += CONVOLVE1x3(s[0], middle, right, w[2]); + pixels[1] += CONVOLVE1x3(s[0], middle, right, w[1]); + pixels[2] += CONVOLVE1x3(s[0], middle, right, w[0]); + + // forth line + LOAD2(s, src, (src.current_offset + (uint(3) * (src_stride_y >> 2))) >> uint(2)); + middle = vec4(s[0].yzw, s[1].x); + right = vec4(s[0].zw, s[1].xy); + pixels[1] += CONVOLVE1x3(s[0], middle, right, w[2]); + pixels[2] += CONVOLVE1x3(s[0], middle, right, w[1]); + + // fifth line + LOAD2(s, src, (src.current_offset + (src_stride_y)) >> uint(2)); + middle = vec4(s[0].yzw, s[1].x); + right = vec4(s[0].zw, s[1].xy); + pixels[2] += CONVOLVE1x3(s[0], middle, right, w[2]); + + src.current_offset += src_stride_z >> 2; + weights.current_offset += weights_stride_z >> 2; + } + +#ifdef BIAS + float b; + LOAD1(b, biases, vector_offset(biases, int(z_index))); + + pixels[0] += vec4(b); + pixels[1] += vec4(b); + pixels[2] += vec4(b); +#endif /* BIAS */ + + STORE1(dst, dst.current_offset >> uint(2), pixels[0]); + STORE1(dst, (dst.current_offset + (dst_stride_y >> 2)) >> uint(2), pixels[1]); + STORE1(dst, (dst.current_offset + (dst_stride_y >> 1)) >> uint(2), pixels[2]); +} +#elif defined(PROCESS_X_8ELEMENTS_Y_3ELEMENTS_FP16) +precision mediump float; + +BUFFER_DECLARATION(src, 1, uvec4, readonly); +BUFFER_DECLARATION(dst, 2, uvec4, writeonly); +BUFFER_DECLARATION(weights, 3, uint, readonly); +#ifdef BIAS +BUFFER_DECLARATION(biases, 4, uint, readonly); +#endif /* BIAS */ + +#define CONVOLVE1x3(s, w) convolve1x3_stride1(s, w) + +vec4[2] convolve1x3_stride1(vec4 tmp[3], vec3 w) +{ + vec4 middle; + vec4 right; + vec4 r[2]; + + middle = vec4(tmp[0].yzw, tmp[1].x); + right = vec4(tmp[0].zw, tmp[1].xy); + + r[0] = tmp[0] * w[0] + middle * w[1] + right * w[2]; + + middle = vec4(tmp[1].yzw, tmp[2].x); + right = vec4(tmp[1].zw, tmp[2].xy); + + r[1] = tmp[1] * w[0] + middle * w[1] + right * w[2]; + + return r; +} + +vec4[3] load_and_unpack(uint offset) +{ + uvec4 packed_s[2]; + vec4 s[3]; + + LOAD1(packed_s[0], src, offset); + LOAD1(packed_s[1], src, offset + uint(1)); + ; + + s[0] = vec4(unpackHalf2x16(packed_s[0].x), unpackHalf2x16(packed_s[0].y)); + s[1] = vec4(unpackHalf2x16(packed_s[0].z), unpackHalf2x16(packed_s[0].w)); + s[2] = vec4(unpackHalf2x16(packed_s[1].x), unpackHalf2x16(packed_s[1].y)); + + return s; +} + +/** An optimized direct convolution 3x3 OpenGL ES compute shader for process 8x3 elements at once + * + * @note This OpenGL ES shader works with stride_x = 1 and 2 + * @note The data type must be passed at compile time using "#define DATA_TYPE_FP16" + * @note If biases are used then "define HAS_BIAS" has to be passed at compile time + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p src_ptr + * @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes) + * @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes) + * @param[in] weights_step_y weights_stride_y * number of elements along y processed per workitem(in bytes) + * @param[in] weights_stride_z Stride of the weights tensor in Z dimension (in bytes) + * @param[in] weights_step_z weights_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] weights_offset_first_element_in_bytes The offset of the first element in the weights tensor + * @param[in] biases_ptr Pointer to the biases tensor. Same as @p src_ptr + * @param[in] biases_stride_x Stride of the biases tensor in X dimension (in bytes) + * @param[in] biases_step_x biases_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] biases_offset_first_element_in_bytes The offset of the first element in the biases tensor + * @param[in] weights_stride_w Stride of the weights tensor in the 4th dimension + * @param[in] weights_depth The third dimensions of the weights tensors + */ +void main() +{ + Image src = CONVERT_TO_IMAGE_STRUCT_FP16(src); + Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP_FP16(weights); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst); + +#ifdef BIAS + Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP_FP16(biases); +#endif /* BIAS */ + + uvec2 packed_d[2]; + uvec4 vd; + + vec4 pixels[3][2]; + int i, j; + for(i = 0; i < 3; i++) + { + for(j = 0; j < 2; j++) + { + pixels[i][j] = vec4(0); + } + } + + uint z_index = gl_GlobalInvocationID.z; + + weights.current_offset += z_index * weights_stride_w; + + for(int d = 0; d < int(weights_depth); ++d) + { + // load 3 weights once + uvec2 packed_w[3]; + + LOAD2(packed_w[0], weights, tensor3D_offset_fp16(weights, 0, 0, 0) >> 2); + LOAD2(packed_w[1], weights, tensor3D_offset_fp16(weights, 0, 1, 0) >> 2); + LOAD2(packed_w[2], weights, tensor3D_offset_fp16(weights, 0, 2, 0) >> 2); + + vec3 w[3]; + w[0] = vec3(unpackHalf2x16(packed_w[0].x), unpackHalf2x16(packed_w[0].y).x); + w[1] = vec3(unpackHalf2x16(packed_w[1].x), unpackHalf2x16(packed_w[1].y).x); + w[2] = vec3(unpackHalf2x16(packed_w[2].x), unpackHalf2x16(packed_w[2].y).x); + + uvec4 packed_s[2]; + vec4 s[3]; + vec4 r[2]; + uint offset; + // first line + offset = src.current_offset >> uint(4); + s = load_and_unpack(offset); + + r = CONVOLVE1x3(s, w[0]); + pixels[0][0] += r[0]; + pixels[0][1] += r[1]; + + // second line + offset = (src.current_offset + src_stride_y) >> uint(4); + s = load_and_unpack(offset); + + r = CONVOLVE1x3(s, w[1]); + pixels[0][0] += r[0]; + pixels[0][1] += r[1]; + r = CONVOLVE1x3(s, w[0]); + pixels[1][0] += r[0]; + pixels[1][1] += r[1]; + + // third line + offset = (src.current_offset + (src_stride_y << 1)) >> uint(4); + s = load_and_unpack(offset); + + r = CONVOLVE1x3(s, w[2]); + pixels[0][0] += r[0]; + pixels[0][1] += r[1]; + r = CONVOLVE1x3(s, w[1]); + pixels[1][0] += r[0]; + pixels[1][1] += r[1]; + r = CONVOLVE1x3(s, w[0]); + pixels[2][0] += r[0]; + pixels[2][1] += r[1]; + + // forth line + offset = (src.current_offset + uint(3) * (src_stride_y)) >> uint(4); + s = load_and_unpack(offset); + + r = CONVOLVE1x3(s, w[2]); + pixels[1][0] += r[0]; + pixels[1][1] += r[1]; + r = CONVOLVE1x3(s, w[1]); + pixels[2][0] += r[0]; + pixels[2][1] += r[1]; + + // fifth line + offset = (src.current_offset + (src_stride_y << 2)) >> uint(4); + s = load_and_unpack(offset); + + r = CONVOLVE1x3(s, w[2]); + pixels[2][0] += r[0]; + pixels[2][1] += r[1]; + + src.current_offset += src_stride_z; + weights.current_offset += weights_stride_z; + } + +#ifdef BIAS + uint packed_b; + float b; + LOAD1(packed_b, biases, vector_offset_fp16(biases, int(z_index)) >> 2); + + if(z_index % uint(2) == uint(0)) + { + b = unpackHalf2x16(packed_b).x; + } + else + { + b = unpackHalf2x16(packed_b).y; + } + + for(i = 0; i < 3; i++) + { + for(j = 0; j < 2; j++) + { + pixels[i][j] += vec4(b); + } + } +#endif /* BIAS */ + + packed_d[0] = uvec2(packHalf2x16(pixels[0][0].xy), packHalf2x16(pixels[0][0].zw)); + packed_d[1] = uvec2(packHalf2x16(pixels[0][1].xy), packHalf2x16(pixels[0][1].zw)); + vd = uvec4(packed_d[0], packed_d[1]); + STORE1(dst, dst.current_offset >> uint(4), vd); + + packed_d[0] = uvec2(packHalf2x16(pixels[1][0].xy), packHalf2x16(pixels[1][0].zw)); + packed_d[1] = uvec2(packHalf2x16(pixels[1][1].xy), packHalf2x16(pixels[1][1].zw)); + vd = uvec4(packed_d[0], packed_d[1]); + STORE1(dst, (dst.current_offset + dst_stride_y) >> uint(4), vd); + + packed_d[0] = uvec2(packHalf2x16(pixels[2][0].xy), packHalf2x16(pixels[2][0].zw)); + packed_d[1] = uvec2(packHalf2x16(pixels[2][1].xy), packHalf2x16(pixels[2][1].zw)); + vd = uvec4(packed_d[0], packed_d[1]); + STORE1(dst, (dst.current_offset + (dst_stride_y << 1)) >> uint(4), vd); +} +#elif defined(PROCESS_X_4ELEMENTS_FP16) +precision mediump float; + +BUFFER_DECLARATION(src, 1, uvec2, readonly); +BUFFER_DECLARATION(dst, 2, uvec2, writeonly); +BUFFER_DECLARATION(weights, 3, uint, readonly); +#ifdef BIAS +BUFFER_DECLARATION(biases, 4, uint, readonly); +#endif /* BIAS */ + +#if STRIDE_X == 2 +#define CONVOLVE1x3(s, w) convolve1x3_stride2(s, w) +#define LOAD_AND_UNPACK(offset) load_and_unpack_stride2(offset) +#elif STRIDE_X == 1 /* STRIDE_X == 1 */ +#define CONVOLVE1x3(s, w) convolve1x3_stride1(s, w) +#define LOAD_AND_UNPACK(offset) load_and_unpack_stride1(offset) +#else /* STRIDE_X not equals 1 or 2 */ +#error STRIDE_X larger than 2 is not supported +#endif /* STRIDE_X == 2 */ + +vec4 convolve1x3_stride1(vec4 tmp[2], vec3 w) +{ + vec4 middle; + vec4 right; + vec4 r; + + middle = vec4(tmp[0].yzw, tmp[1].x); + right = vec4(tmp[0].zw, tmp[1].xy); + + r = tmp[0] * w[0] + middle * w[1] + right * w[2]; + + return r; +} + +vec4 convolve1x3_stride2(vec4 tmp[3], vec3 w) +{ + vec4 left; + vec4 middle; + vec4 right; + vec4 r; + + left = vec4(tmp[0].xz, tmp[1].xz); + middle = vec4(tmp[0].yw, tmp[1].yw); + right = vec4(tmp[0].z, tmp[1].xz, tmp[2].x); + + r = left * w[0] + middle * w[1] + right * w[2]; + + return r; +} + +vec4[2] load_and_unpack_stride1(uint offset) +{ + uvec2 packed_s[2]; + vec4 s[2]; + + LOAD1(packed_s[0], src, offset); + LOAD1(packed_s[1], src, offset + uint(1)); + + s[0] = vec4(unpackHalf2x16(packed_s[0].x), unpackHalf2x16(packed_s[0].y)); + s[1] = vec4(unpackHalf2x16(packed_s[1].x), unpackHalf2x16(packed_s[1].y)); + + return s; +} + +vec4[3] load_and_unpack_stride2(uint offset) +{ + uvec2 packed_s[3]; + vec4 s[3]; + + LOAD1(packed_s[0], src, offset); + LOAD1(packed_s[1], src, offset + uint(1)); + LOAD1(packed_s[2], src, offset + uint(2)); + + s[0] = vec4(unpackHalf2x16(packed_s[0].x), unpackHalf2x16(packed_s[0].y)); + s[1] = vec4(unpackHalf2x16(packed_s[1].x), unpackHalf2x16(packed_s[1].y)); + s[2] = vec4(unpackHalf2x16(packed_s[2].x), unpackHalf2x16(packed_s[2].y)); + + return s; +} + +/** An optimized direct convolution 3x3 OpenGL ES compute shader for process 4 elements at once + * + * @note This OpenGL ES shader works with stride_x = 1 and 2 + * @note The data type must be passed at compile time using "#define DATA_TYPE_FP16" + * @note If biases are used then "define HAS_BIAS" has to be passed at compile time + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p src_ptr + * @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes) + * @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes) + * @param[in] weights_step_y weights_stride_y * number of elements along y processed per workitem(in bytes) + * @param[in] weights_stride_z Stride of the weights tensor in Z dimension (in bytes) + * @param[in] weights_step_z weights_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] weights_offset_first_element_in_bytes The offset of the first element in the weights tensor + * @param[in] biases_ptr Pointer to the biases tensor. Same as @p src_ptr + * @param[in] biases_stride_x Stride of the biases tensor in X dimension (in bytes) + * @param[in] biases_step_x biases_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] biases_offset_first_element_in_bytes The offset of the first element in the biases tensor + * @param[in] weights_stride_w Stride of the weights tensor in the 4th dimension + * @param[in] weights_depth The third dimensions of the weights tensors + */ +void main() +{ + Image src = CONVERT_TO_IMAGE_STRUCT_FP16(src); + Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP_FP16(weights); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst); + +#ifdef BIAS + Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP_FP16(biases); +#endif /* BIAS */ + + uvec2 packed_d; + + vec4 pixels = vec4(0); + + uint z_index = gl_GlobalInvocationID.z; + + weights.current_offset += z_index * weights_stride_w; + + for(int d = 0; d < int(weights_depth); ++d) + { + // load 3 weights once + uvec2 packed_w[3]; + + LOAD2(packed_w[0], weights, tensor3D_offset_fp16(weights, 0, 0, 0) >> 2); + LOAD2(packed_w[1], weights, tensor3D_offset_fp16(weights, 0, 1, 0) >> 2); + LOAD2(packed_w[2], weights, tensor3D_offset_fp16(weights, 0, 2, 0) >> 2); + + vec3 w[3]; + w[0] = vec3(unpackHalf2x16(packed_w[0].x), unpackHalf2x16(packed_w[0].y).x); + w[1] = vec3(unpackHalf2x16(packed_w[1].x), unpackHalf2x16(packed_w[1].y).x); + w[2] = vec3(unpackHalf2x16(packed_w[2].x), unpackHalf2x16(packed_w[2].y).x); + +#if STRIDE_X == 2 + vec4 s[3]; +#elif STRIDE_X == 1 /* STRIDE_X == 1 */ + vec4 s[2]; +#else /* STRIDE_X not equals 1 or 2 */ +#error STRIDE_X larger than 2 is not supported +#endif /* STRIDE_X == 2 */ + vec4 r; + uint offset; + // first line + offset = src.current_offset >> uint(3); + s = LOAD_AND_UNPACK(offset); + + pixels += CONVOLVE1x3(s, w[0]); + + // second line + offset = (src.current_offset + src_stride_y) >> uint(3); + s = LOAD_AND_UNPACK(offset); + + pixels += CONVOLVE1x3(s, w[1]); + + // third line + offset = (src.current_offset + (src_stride_y << 1)) >> uint(3); + s = LOAD_AND_UNPACK(offset); + + pixels += CONVOLVE1x3(s, w[2]); + + src.current_offset += src_stride_z; + weights.current_offset += weights_stride_z; + } + +#ifdef BIAS + uint packed_b; + float b; + LOAD1(packed_b, biases, vector_offset_fp16(biases, int(z_index)) >> 2); + + if(z_index % uint(2) == uint(0)) + { + b = unpackHalf2x16(packed_b).x; + } + else + { + b = unpackHalf2x16(packed_b).y; + } + + pixels += vec4(b); +#endif /* BIAS */ + + packed_d = uvec2(packHalf2x16(pixels.xy), packHalf2x16(pixels.zw)); + STORE1(dst, dst.current_offset >> uint(3), packed_d); +} +#elif defined(PROCESS_X_4ELEMENTS_Y_3ELEMENTS_FP16) +precision mediump float; + +BUFFER_DECLARATION(src, 1, uvec2, readonly); +BUFFER_DECLARATION(dst, 2, uvec2, writeonly); +BUFFER_DECLARATION(weights, 3, uint, readonly); +#ifdef BIAS +BUFFER_DECLARATION(biases, 4, uint, readonly); +#endif /* BIAS */ + +#define CONVOLVE1x3(s, w) convolve1x3_stride1(s, w) + +vec4 convolve1x3_stride1(vec4 tmp[2], vec3 w) +{ + vec4 middle; + vec4 right; + vec4 r; + + middle = vec4(tmp[0].yzw, tmp[1].x); + right = vec4(tmp[0].zw, tmp[1].xy); + + r = tmp[0] * w[0] + middle * w[1] + right * w[2]; + + return r; +} + +vec4[2] load_and_unpack(uint offset) +{ + uvec2 packed_s[2]; + vec4 s[2]; + + LOAD1(packed_s[0], src, offset); + LOAD1(packed_s[1], src, offset + uint(1)); + + s[0] = vec4(unpackHalf2x16(packed_s[0].x), unpackHalf2x16(packed_s[0].y)); + s[1] = vec4(unpackHalf2x16(packed_s[1].x), unpackHalf2x16(packed_s[1].y)); + + return s; +} + +/** An optimized direct convolution 3x3 OpenGL ES compute shader for process 4x3 elements at once + * + * @note This OpenGL ES shader works with stride_x = 1 and 2 + * @note The data type must be passed at compile time using "#define DATA_TYPE_FP16" + * @note If biases are used then "define HAS_BIAS" has to be passed at compile time + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p src_ptr + * @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes) + * @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes) + * @param[in] weights_step_y weights_stride_y * number of elements along y processed per workitem(in bytes) + * @param[in] weights_stride_z Stride of the weights tensor in Z dimension (in bytes) + * @param[in] weights_step_z weights_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] weights_offset_first_element_in_bytes The offset of the first element in the weights tensor + * @param[in] biases_ptr Pointer to the biases tensor. Same as @p src_ptr + * @param[in] biases_stride_x Stride of the biases tensor in X dimension (in bytes) + * @param[in] biases_step_x biases_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] biases_offset_first_element_in_bytes The offset of the first element in the biases tensor + * @param[in] weights_stride_w Stride of the weights tensor in the 4th dimension + * @param[in] weights_depth The third dimensions of the weights tensors + */ +void main() +{ + Image src = CONVERT_TO_IMAGE_STRUCT_FP16(src); + Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP_FP16(weights); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst); + +#ifdef BIAS + Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP_FP16(biases); +#endif /* BIAS */ + + uvec2 packed_d; + + vec4 pixels[3]; + int i; + + for(i = 0; i < 3; i++) + { + pixels[i] = vec4(0); + } + + uint z_index = gl_GlobalInvocationID.z; + + weights.current_offset += z_index * weights_stride_w; + + for(int d = 0; d < int(weights_depth); ++d) + { + // load 3 weights once + uvec2 packed_w[3]; + + LOAD2(packed_w[0], weights, tensor3D_offset_fp16(weights, 0, 0, 0) >> 2); + LOAD2(packed_w[1], weights, tensor3D_offset_fp16(weights, 0, 1, 0) >> 2); + LOAD2(packed_w[2], weights, tensor3D_offset_fp16(weights, 0, 2, 0) >> 2); + + vec3 w[3]; + w[0] = vec3(unpackHalf2x16(packed_w[0].x), unpackHalf2x16(packed_w[0].y).x); + w[1] = vec3(unpackHalf2x16(packed_w[1].x), unpackHalf2x16(packed_w[1].y).x); + w[2] = vec3(unpackHalf2x16(packed_w[2].x), unpackHalf2x16(packed_w[2].y).x); + + vec4 s[2]; + vec4 r; + uint offset; + // first line + offset = src.current_offset >> uint(3); + s = load_and_unpack(offset); + + pixels[0] += CONVOLVE1x3(s, w[0]); + + // second line + offset = (src.current_offset + src_stride_y) >> uint(3); + s = load_and_unpack(offset); + + pixels[0] += CONVOLVE1x3(s, w[1]); + pixels[1] += CONVOLVE1x3(s, w[0]); + + // third line + offset = (src.current_offset + (src_stride_y << 1)) >> uint(3); + s = load_and_unpack(offset); + + pixels[0] += CONVOLVE1x3(s, w[2]); + pixels[1] += CONVOLVE1x3(s, w[1]); + pixels[2] += CONVOLVE1x3(s, w[0]); + + // forth line + offset = (src.current_offset + uint(3) * (src_stride_y)) >> uint(3); + s = load_and_unpack(offset); + + pixels[1] += CONVOLVE1x3(s, w[2]); + pixels[2] += CONVOLVE1x3(s, w[1]); + + // fifth line + offset = (src.current_offset + (src_stride_y << 2)) >> uint(3); + s = load_and_unpack(offset); + + pixels[2] += CONVOLVE1x3(s, w[2]); + + src.current_offset += src_stride_z; + weights.current_offset += weights_stride_z; + } + +#ifdef BIAS + uint packed_b; + float b; + LOAD1(packed_b, biases, vector_offset_fp16(biases, int(z_index)) >> 2); + + if(z_index % uint(2) == uint(0)) + { + b = unpackHalf2x16(packed_b).x; + } + else + { + b = unpackHalf2x16(packed_b).y; + } + + for(i = 0; i < 3; i++) + { + pixels[i] += vec4(b); + } +#endif /* BIAS */ + + packed_d = uvec2(packHalf2x16(pixels[0].xy), packHalf2x16(pixels[0].zw)); + STORE1(dst, dst.current_offset >> uint(3), packed_d); + + packed_d = uvec2(packHalf2x16(pixels[1].xy), packHalf2x16(pixels[1].zw)); + STORE1(dst, (dst.current_offset + dst_stride_y) >> uint(3), packed_d); + + packed_d = uvec2(packHalf2x16(pixels[2].xy), packHalf2x16(pixels[2].zw)); + STORE1(dst, (dst.current_offset + (dst_stride_y << 1)) >> uint(3), packed_d); +} +#elif defined(PROCESS_X_4ELEMENTS_Y_4ELEMENTS_FP16) +precision mediump float; + +BUFFER_DECLARATION(src, 1, uvec2, readonly); +BUFFER_DECLARATION(dst, 2, uvec2, writeonly); +BUFFER_DECLARATION(weights, 3, uint, readonly); +#ifdef BIAS +BUFFER_DECLARATION(biases, 4, uint, readonly); +#endif /* BIAS */ + +#define CONVOLVE1x3(s, w) convolve1x3_stride1(s, w) + +vec4 convolve1x3_stride1(vec4 tmp[2], vec3 w) +{ + vec4 middle; + vec4 right; + vec4 r; + + middle = vec4(tmp[0].yzw, tmp[1].x); + right = vec4(tmp[0].zw, tmp[1].xy); + + r = tmp[0] * w[0] + middle * w[1] + right * w[2]; + + return r; +} + +vec4[2] load_and_unpack(uint offset) +{ + uvec2 packed_s[2]; + vec4 s[2]; + + LOAD1(packed_s[0], src, offset); + LOAD1(packed_s[1], src, offset + uint(1)); + + s[0] = vec4(unpackHalf2x16(packed_s[0].x), unpackHalf2x16(packed_s[0].y)); + s[1] = vec4(unpackHalf2x16(packed_s[1].x), unpackHalf2x16(packed_s[1].y)); + + return s; +} + +/** An optimized direct convolution 3x3 OpenGL ES compute shader for process 4x4 elements at once + * + * @note This OpenGL ES shader works with stride_x = 1 and 2 + * @note The data type must be passed at compile time using "#define DATA_TYPE_FP16" + * @note If biases are used then "define HAS_BIAS" has to be passed at compile time + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p src_ptr + * @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes) + * @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes) + * @param[in] weights_step_y weights_stride_y * number of elements along y processed per workitem(in bytes) + * @param[in] weights_stride_z Stride of the weights tensor in Z dimension (in bytes) + * @param[in] weights_step_z weights_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] weights_offset_first_element_in_bytes The offset of the first element in the weights tensor + * @param[in] biases_ptr Pointer to the biases tensor. Same as @p src_ptr + * @param[in] biases_stride_x Stride of the biases tensor in X dimension (in bytes) + * @param[in] biases_step_x biases_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] biases_offset_first_element_in_bytes The offset of the first element in the biases tensor + * @param[in] weights_stride_w Stride of the weights tensor in the 4th dimension + * @param[in] weights_depth The third dimensions of the weights tensors + */ +void main() +{ + Image src = CONVERT_TO_IMAGE_STRUCT_FP16(src); + Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP_FP16(weights); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst); + +#ifdef BIAS + Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP_FP16(biases); +#endif /* BIAS */ + + uvec2 packed_d; + + vec4 pixels[4]; + int i; + + for(i = 0; i < 4; i++) + { + pixels[i] = vec4(0); + } + + uint z_index = gl_GlobalInvocationID.z; + + weights.current_offset += z_index * weights_stride_w; + + for(int d = 0; d < int(weights_depth); ++d) + { + // load 3 weights once + uvec2 packed_w[3]; + + LOAD2(packed_w[0], weights, tensor3D_offset_fp16(weights, 0, 0, 0) >> 2); + LOAD2(packed_w[1], weights, tensor3D_offset_fp16(weights, 0, 1, 0) >> 2); + LOAD2(packed_w[2], weights, tensor3D_offset_fp16(weights, 0, 2, 0) >> 2); + + vec3 w[3]; + w[0] = vec3(unpackHalf2x16(packed_w[0].x), unpackHalf2x16(packed_w[0].y).x); + w[1] = vec3(unpackHalf2x16(packed_w[1].x), unpackHalf2x16(packed_w[1].y).x); + w[2] = vec3(unpackHalf2x16(packed_w[2].x), unpackHalf2x16(packed_w[2].y).x); + + vec4 s[2]; + vec4 r; + uint offset; + // first line + offset = src.current_offset >> uint(3); + s = load_and_unpack(offset); + + pixels[0] += CONVOLVE1x3(s, w[0]); + + // second line + offset = (src.current_offset + src_stride_y) >> uint(3); + s = load_and_unpack(offset); + + pixels[0] += CONVOLVE1x3(s, w[1]); + pixels[1] += CONVOLVE1x3(s, w[0]); + + // third line + offset = (src.current_offset + (src_stride_y << 1)) >> uint(3); + s = load_and_unpack(offset); + + pixels[0] += CONVOLVE1x3(s, w[2]); + pixels[1] += CONVOLVE1x3(s, w[1]); + pixels[2] += CONVOLVE1x3(s, w[0]); + + // forth line + offset = (src.current_offset + uint(3) * (src_stride_y)) >> uint(3); + s = load_and_unpack(offset); + + pixels[1] += CONVOLVE1x3(s, w[2]); + pixels[2] += CONVOLVE1x3(s, w[1]); + pixels[3] += CONVOLVE1x3(s, w[0]); + + // fifth line + offset = (src.current_offset + (src_stride_y << 2)) >> uint(3); + s = load_and_unpack(offset); + + pixels[2] += CONVOLVE1x3(s, w[2]); + pixels[3] += CONVOLVE1x3(s, w[1]); + + // sixth line + offset = (src.current_offset + uint(5) * (src_stride_y)) >> uint(3); + s = load_and_unpack(offset); + + pixels[3] += CONVOLVE1x3(s, w[2]); + + src.current_offset += src_stride_z; + weights.current_offset += weights_stride_z; + } + +#ifdef BIAS + uint packed_b; + float b; + LOAD1(packed_b, biases, vector_offset_fp16(biases, int(z_index)) >> 2); + + if(z_index % uint(2) == uint(0)) + { + b = unpackHalf2x16(packed_b).x; + } + else + { + b = unpackHalf2x16(packed_b).y; + } + + for(i = 0; i < 4; i++) + { + pixels[i] += vec4(b); + } +#endif /* BIAS */ + + packed_d = uvec2(packHalf2x16(pixels[0].xy), packHalf2x16(pixels[0].zw)); + STORE1(dst, dst.current_offset >> uint(3), packed_d); + + packed_d = uvec2(packHalf2x16(pixels[1].xy), packHalf2x16(pixels[1].zw)); + STORE1(dst, (dst.current_offset + dst_stride_y) >> uint(3), packed_d); + + packed_d = uvec2(packHalf2x16(pixels[2].xy), packHalf2x16(pixels[2].zw)); + STORE1(dst, (dst.current_offset + (dst_stride_y << 1)) >> uint(3), packed_d); + + packed_d = uvec2(packHalf2x16(pixels[3].xy), packHalf2x16(pixels[3].zw)); + STORE1(dst, (dst.current_offset + uint(3) * (dst_stride_y)) >> uint(3), packed_d); +} +#elif defined(PROCESS_X_4ELEMENTS_Y_3ELEMENTS_Z_2ELEMENTS_FP16) +precision mediump float; + +BUFFER_DECLARATION(src, 1, uvec2, readonly); +BUFFER_DECLARATION(dst, 2, uvec2, writeonly); +BUFFER_DECLARATION(weights, 3, uint, readonly); +#ifdef BIAS +BUFFER_DECLARATION(biases, 4, uint, readonly); +#endif /* BIAS */ + +#define CONVOLVE1x3(s, w) convolve1x3_stride1(s, w) + +vec4 convolve1x3_stride1(vec4 tmp[2], vec3 w) +{ + vec4 middle; + vec4 right; + vec4 r; + + middle = vec4(tmp[0].yzw, tmp[1].x); + right = vec4(tmp[0].zw, tmp[1].xy); + + r = tmp[0] * w[0] + middle * w[1] + right * w[2]; + + return r; +} + +vec4[2] load_and_unpack(uint offset) +{ + uvec2 packed_s[2]; + vec4 s[2]; + + LOAD1(packed_s[0], src, offset); + LOAD1(packed_s[1], src, offset + uint(1)); + + s[0] = vec4(unpackHalf2x16(packed_s[0].x), unpackHalf2x16(packed_s[0].y)); + s[1] = vec4(unpackHalf2x16(packed_s[1].x), unpackHalf2x16(packed_s[1].y)); + + return s; +} + +/** An optimized direct convolution 3x3 OpenGL ES compute shader for process 4x3x2 elements at once + * + * @note This OpenGL ES shader works with stride_x = 1 and 2 + * @note The data type must be passed at compile time using "#define DATA_TYPE_FP16" + * @note If biases are used then "define HAS_BIAS" has to be passed at compile time + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p src_ptr + * @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes) + * @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes) + * @param[in] weights_step_y weights_stride_y * number of elements along y processed per workitem(in bytes) + * @param[in] weights_stride_z Stride of the weights tensor in Z dimension (in bytes) + * @param[in] weights_step_z weights_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] weights_offset_first_element_in_bytes The offset of the first element in the weights tensor + * @param[in] biases_ptr Pointer to the biases tensor. Same as @p src_ptr + * @param[in] biases_stride_x Stride of the biases tensor in X dimension (in bytes) + * @param[in] biases_step_x biases_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] biases_offset_first_element_in_bytes The offset of the first element in the biases tensor + * @param[in] weights_stride_w Stride of the weights tensor in the 4th dimension + * @param[in] weights_depth The third dimensions of the weights tensors + */ +void main() +{ + Image src = CONVERT_TO_IMAGE_STRUCT_FP16(src); + Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP_FP16(weights); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst); + +#ifdef BIAS + Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP_FP16(biases); +#endif /* BIAS */ + + uvec2 packed_d; + + vec4 pixels[3]; + int i; + + uint z_base_index = gl_GlobalInvocationID.z << 1; + + // store orginal src current offset + uint s_offset = src.current_offset; + + weights.current_offset += z_base_index * weights_stride_w; + + for(int z = 0; z < 2; ++z) + { + uint z_index = z_base_index + uint(z); + + src.current_offset = s_offset; + //weights.current_offset = z_index * weights_stride_w; + + for(i = 0; i < 3; i++) + { + pixels[i] = vec4(0); + } + + for(int d = 0; d < int(weights_depth); ++d) + { + // load 3 weights once + uvec2 packed_w[3]; + + LOAD2(packed_w[0], weights, tensor3D_offset_fp16(weights, 0, 0, 0) >> 2); + LOAD2(packed_w[1], weights, tensor3D_offset_fp16(weights, 0, 1, 0) >> 2); + LOAD2(packed_w[2], weights, tensor3D_offset_fp16(weights, 0, 2, 0) >> 2); + + vec3 w[3]; + w[0] = vec3(unpackHalf2x16(packed_w[0].x), unpackHalf2x16(packed_w[0].y).x); + w[1] = vec3(unpackHalf2x16(packed_w[1].x), unpackHalf2x16(packed_w[1].y).x); + w[2] = vec3(unpackHalf2x16(packed_w[2].x), unpackHalf2x16(packed_w[2].y).x); + + vec4 s[2]; + vec4 r; + uint offset; + // first line + offset = src.current_offset >> uint(3); + s = load_and_unpack(offset); + + pixels[0] += CONVOLVE1x3(s, w[0]); + + // second line + offset = (src.current_offset + src_stride_y) >> uint(3); + s = load_and_unpack(offset); + + pixels[0] += CONVOLVE1x3(s, w[1]); + pixels[1] += CONVOLVE1x3(s, w[0]); + + // third line + offset = (src.current_offset + (src_stride_y << 1)) >> uint(3); + s = load_and_unpack(offset); + + pixels[0] += CONVOLVE1x3(s, w[2]); + pixels[1] += CONVOLVE1x3(s, w[1]); + pixels[2] += CONVOLVE1x3(s, w[0]); + + // forth line + offset = (src.current_offset + uint(3) * (src_stride_y)) >> uint(3); + s = load_and_unpack(offset); + + pixels[1] += CONVOLVE1x3(s, w[2]); + pixels[2] += CONVOLVE1x3(s, w[1]); + + // fifth line + offset = (src.current_offset + (src_stride_y << 2)) >> uint(3); + s = load_and_unpack(offset); + + pixels[2] += CONVOLVE1x3(s, w[2]); + + src.current_offset += src_stride_z; + weights.current_offset += weights_stride_z; + } + +#ifdef BIAS + uint packed_b; + float b; + LOAD1(packed_b, biases, vector_offset_fp16(biases, int(z_index)) >> 2); + + if(z_index % uint(2) == uint(0)) + { + b = unpackHalf2x16(packed_b).x; + } + else + { + b = unpackHalf2x16(packed_b).y; + } + + for(i = 0; i < 3; i++) + { + pixels[i] += vec4(b); + } +#endif /* BIAS */ + + packed_d = uvec2(packHalf2x16(pixels[0].xy), packHalf2x16(pixels[0].zw)); + STORE1(dst, dst.current_offset >> uint(3), packed_d); + + packed_d = uvec2(packHalf2x16(pixels[1].xy), packHalf2x16(pixels[1].zw)); + STORE1(dst, (dst.current_offset + dst_stride_y) >> uint(3), packed_d); + + packed_d = uvec2(packHalf2x16(pixels[2].xy), packHalf2x16(pixels[2].zw)); + STORE1(dst, (dst.current_offset + (dst_stride_y << 1)) >> uint(3), packed_d); + + dst.current_offset += dst_stride_z; + } +} +#endif /* PROCESS_1_ELEMENT */ diff --git a/src/core/GLES_COMPUTE/cs_shaders/direct_convolution5x5.cs b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution5x5.cs new file mode 100644 index 0000000000..4fdbf0d19e --- /dev/null +++ b/src/core/GLES_COMPUTE/cs_shaders/direct_convolution5x5.cs @@ -0,0 +1,313 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in; + +#include "helpers.h" + +layout(std140) uniform shader_params +{ + TENSOR3D_PARAM_DECLARATION(src); + TENSOR3D_PARAM_DECLARATION(dst); + TENSOR3D_PARAM_DECLARATION(weights); +#ifdef BIAS + VECTOR_PARAM_DECLARATION(biases); +#endif /* BIAS */ + uint weights_stride_w; + uint weights_depth; +}; + +#ifdef DATA_TYPE_FP32 + +precision highp float; + +BUFFER_DECLARATION(src, 1, float, readonly); +BUFFER_DECLARATION(dst, 2, float, writeonly); +BUFFER_DECLARATION(weights, 3, float, readonly); +#ifdef BIAS +BUFFER_DECLARATION(biases, 4, float, readonly); +#endif /* BIAS */ + +#define LOAD20(r, name, offset) \ + r[0] = LOAD4(name, offset); \ + r[1] = LOAD4(name, offset + uint(1)); \ + r[2] = LOAD4(name, offset + uint(2)); \ + r[3] = LOAD4(name, offset + uint(3)); \ + r[4] = LOAD4(name, offset + uint(4)) + +/** This kernel performs a direct convolution to convolve the low three dimensions. + * + * @note The data type must be passed at compile time using "#define DATA_TYPE_FP32" + * @note If biases are used then "define HAS_BIAS" has to be passed at compile time + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F32 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p src_ptr + * @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes) + * @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes) + * @param[in] weights_step_y weights_stride_y * number of elements along y processed per workitem(in bytes) + * @param[in] weights_stride_z Stride of the weights tensor in Z dimension (in bytes) + * @param[in] weights_step_z weights_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] weights_offset_first_element_in_bytes The offset of the first element in the weights tensor + * @param[in] biases_ptr Pointer to the biases tensor. Same as @p src_ptr + * @param[in] biases_stride_x Stride of the biases tensor in X dimension (in bytes) + * @param[in] biases_step_x biases_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] biases_offset_first_element_in_bytes The offset of the first element in the biases tensor + * @param[in] weights_stride_w Stride of the weights tensor in the 4th dimension + * @param[in] weights_depth The third dimensions of the weights tensors + */ +void main() +{ + Image src = CONVERT_TO_IMAGE_STRUCT(src); + Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(weights); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + +#ifdef BIAS + Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases); +#endif /* BIAS */ + + float pixels = CONVERT(0, float); + uint z_index = gl_GlobalInvocationID.z; + weights.current_offset += z_index * weights_stride_w >> 2; + float temp[5]; + float temp_weight[5]; + + for(int d = 0; d < int(weights_depth); ++d) + { + LOAD20(temp, src, offset(src, 0, 0)); + LOAD20(temp_weight, weights, tensor3D_offset(weights, 0, 0, 0)); + pixels += temp[0] * temp_weight[0] + temp[1] * temp_weight[1] + temp[2] * temp_weight[2] + temp[3] * temp_weight[3] + temp[4] * temp_weight[4]; + + LOAD20(temp, src, offset(src, 0, 1)); + LOAD20(temp_weight, weights, tensor3D_offset(weights, 0, 1, 0)); + pixels += temp[0] * temp_weight[0] + temp[1] * temp_weight[1] + temp[2] * temp_weight[2] + temp[3] * temp_weight[3] + temp[4] * temp_weight[4]; + + LOAD20(temp, src, offset(src, 0, 2)); + LOAD20(temp_weight, weights, tensor3D_offset(weights, 0, 2, 0)); + pixels += temp[0] * temp_weight[0] + temp[1] * temp_weight[1] + temp[2] * temp_weight[2] + temp[3] * temp_weight[3] + temp[4] * temp_weight[4]; + + LOAD20(temp, src, offset(src, 0, 3)); + LOAD20(temp_weight, weights, tensor3D_offset(weights, 0, 3, 0)); + pixels += temp[0] * temp_weight[0] + temp[1] * temp_weight[1] + temp[2] * temp_weight[2] + temp[3] * temp_weight[3] + temp[4] * temp_weight[4]; + + LOAD20(temp, src, offset(src, 0, 4)); + LOAD20(temp_weight, weights, tensor3D_offset(weights, 0, 4, 0)); + pixels += temp[0] * temp_weight[0] + temp[1] * temp_weight[1] + temp[2] * temp_weight[2] + temp[3] * temp_weight[3] + temp[4] * temp_weight[4]; + + src.current_offset += (src_stride_z >> 2); + weights.current_offset += (weights_stride_z >> 2); + } + +#ifdef BIAS + pixels += LOAD4(biases, vector_offset(biases, int(z_index))); +#endif /* BIAS */ + + STORE4(dst, CURRENT_OFFSET(dst), pixels); +} + +#elif defined(DATA_TYPE_FP16) + +precision mediump float; + +BUFFER_DECLARATION(src, 1, uvec2, readonly); +BUFFER_DECLARATION(dst, 2, uvec2, writeonly); +BUFFER_DECLARATION(weights, 3, uint, readonly); +#ifdef BIAS +BUFFER_DECLARATION(biases, 4, uint, readonly); +#endif /* BIAS */ + +#if STRIDE_X == 1 +#define LOAD_SRC(src, row) load_src_stride1(src, row) +#define CONVOLVE1x5(src, weight) convolve1x5_stride1(src, weight) +#elif STRIDE_X == 2 /* STRIDE_X == 1 */ +#define LOAD_SRC(src, row) load_src_stride2(src, row) +#define CONVOLVE1x5(src, weight) convolve1x5_stride2(src, weight) +#else /* STRDIDE_X == 1 */ +#error STRIDE_X larger than 2 is not supported +#endif /* STRIDE_X == 1 */ + +vec4[2] load_src_stride1(Image src, int row) +{ + uvec2 packed[2]; + vec4 ret[2]; + + GC_LOAD2_2D_OFFSET(packed, src, 0, row); + + ret[0] = vec4(unpackHalf2x16(packed[0].x), unpackHalf2x16(packed[0].y)); + ret[1] = vec4(unpackHalf2x16(packed[1].x), unpackHalf2x16(packed[1].y)); + + return ret; +} + +vec4[3] load_src_stride2(Image src, int row) +{ + uvec2 packed[3]; + vec4 ret[3]; + + GC_LOAD3_2D_OFFSET(packed, src, 0, row); + + ret[0] = vec4(unpackHalf2x16(packed[0].x), unpackHalf2x16(packed[0].y)); + ret[1] = vec4(unpackHalf2x16(packed[1].x), unpackHalf2x16(packed[1].y)); + ret[2] = vec4(unpackHalf2x16(packed[2].x), unpackHalf2x16(packed[2].y)); + + return ret; +} + +vec2[3] load_weight(Tensor3D weights, int row) +{ + uvec3 packed_w; + vec2 ret[3]; + + GC_LOAD3_3D_OFFSET(packed_w, weights, 0, row, 0); + + ret[0] = vec2(unpackHalf2x16(packed_w[0])); + ret[1] = vec2(unpackHalf2x16(packed_w[1])); + ret[2] = vec2(unpackHalf2x16(packed_w[2])); + + return ret; +} + +// output 4 element per thread +vec4 convolve1x5_stride1(vec4 tmp[2], vec2 w[3]) +{ + vec4 src0 = tmp[0]; + vec4 src1 = vec4(tmp[0].yzw, tmp[1].x); + vec4 src2 = vec4(tmp[0].zw, tmp[1].xy); + vec4 src3 = vec4(tmp[0].w, tmp[1].xyz); + vec4 src4 = tmp[1]; + vec4 ret = src0 * w[0].x + src1 * w[0].y + src2 * w[1].x + src3 * w[1].y + src4 * w[2].x; + + return ret; +} + +vec4 convolve1x5_stride2(vec4 tmp[3], vec2 w[3]) +{ + vec4 src0 = vec4(tmp[0].xz, tmp[1].xz); + vec4 src1 = vec4(tmp[0].yw, tmp[1].yw); + vec4 src2 = vec4(tmp[0].z, tmp[1].xz, tmp[2].x); + vec4 src3 = vec4(tmp[0].w, tmp[1].yw, tmp[2].y); + vec4 src4 = vec4(tmp[1].x, tmp[1].z, tmp[2].xz); + vec4 ret = src0 * w[0].x + src1 * w[0].y + src2 * w[1].x + src3 * w[1].y + src4 * w[2].x; + + return ret; +} + +/** This kernel performs a direct convolution to convolve the low three dimensions. + * + * @note The data type must be passed at compile time using "#define DATA_TYPE_FP16" + * @note If biases are used then "define HAS_BIAS" has to be passed at compile time + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[out] weights_ptr Pointer to the weights tensor. Supported data types: same as @p src_ptr + * @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes) + * @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes) + * @param[in] weights_step_y weights_stride_y * number of elements along y processed per workitem(in bytes) + * @param[in] weights_stride_z Stride of the weights tensor in Z dimension (in bytes) + * @param[in] weights_step_z weights_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] weights_offset_first_element_in_bytes The offset of the first element in the weights tensor + * @param[in] biases_ptr Pointer to the biases tensor. Same as @p src_ptr + * @param[in] biases_stride_x Stride of the biases tensor in X dimension (in bytes) + * @param[in] biases_step_x biases_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] biases_offset_first_element_in_bytes The offset of the first element in the biases tensor + * @param[in] weights_stride_w Stride of the weights tensor in the 4th dimension + * @param[in] weights_depth The third dimensions of the weights tensors + */ +void main() +{ + Image src = GC_CONVERT_TO_IMAGE_STRUCT(src); + Tensor3D weights = GC_CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(weights); + Tensor3D dst = GC_CONVERT_TO_TENSOR3D_STRUCT(dst); + +#ifdef BIAS + Vector biases = GC_CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases); +#endif /* BIAS */ + + vec4 res = vec4(0); + vec2 w[3]; + vec4 s[STRIDE_X + 1]; + uvec2 packed_d; + uint z_index = gl_GlobalInvocationID.z; + + weights.current_offset += z_index * weights_stride_w; + + for(int d = 0; d < int(weights_depth); ++d) + { + for(int row = 0; row < 5; row++) + { + w = load_weight(weights, row); + s = LOAD_SRC(src, row); + res += CONVOLVE1x5(s, w); + } + + src.current_offset += src_stride_z; + weights.current_offset += weights_stride_z; + } + +#ifdef BIAS + uint packed_b; + float b; + + GC_LOAD1_1D_OFFSET(packed_b, biases, z_index); + b = (z_index % uint(2) == uint(0)) ? unpackHalf2x16(packed_b).x : unpackHalf2x16(packed_b).y; + res += vec4(b); +#endif /* BIAS */ + + packed_d = uvec2(packHalf2x16(res.xy), packHalf2x16(res.zw)); + GC_STORE1_3D_OFFSET(packed_d, dst, 0, 0, 0); +} + +#else /* DATA_TYPE_FP16 */ +#error Data type not supported +#endif /* DATA_TYPE_FP16 */ diff --git a/src/core/GLES_COMPUTE/cs_shaders/dropout.cs b/src/core/GLES_COMPUTE/cs_shaders/dropout.cs new file mode 100644 index 0000000000..54e08b1306 --- /dev/null +++ b/src/core/GLES_COMPUTE/cs_shaders/dropout.cs @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in; + +#include "helpers.h" + +layout(std140) uniform shader_params +{ + TENSOR3D_PARAM_DECLARATION(src); + TENSOR3D_PARAM_DECLARATION(mask); + TENSOR3D_PARAM_DECLARATION(dst); +}; + +uint hash(uint x) +{ + x += (x << 10u); + x ^= (x >> 6u); + x += (x << 3u); + x ^= (x >> 11u); + x += (x << 15u); + return x; +} + +uint hash(uvec3 v) +{ + return hash(v.x ^ hash(v.y) ^ hash(v.z)); +} + +float float_construct(uint m) +{ + const uint ieee_mantissa = 0x007FFFFFu; + const uint ieee_one = 0x3F800000u; + + m &= ieee_mantissa; + m |= ieee_one; + + float f = uintBitsToFloat(m); + return f - 1.0; +} + +float rand(vec3 v, float seed) +{ + return float_construct(hash(floatBitsToUint(v + seed))); +} + +#ifdef DATA_TYPE_FP32 + +precision highp float; + +BUFFER_DECLARATION(src, 1, float, readonly); +BUFFER_DECLARATION(mask, 2, float, ); +BUFFER_DECLARATION(dst, 3, float, writeonly); + +/** Dropout is used to improve over-fit on neural networks. + * + * @note The data type must be passed at compile time using "#define DATA_TYPE_FP32" + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F32 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] mask_ptr Pointer to the mask tensor. Supported data types: same as @p src_ptr + * @param[in] mask_stride_x Stride of the mask tensor in X dimension (in bytes) + * @param[in] mask_step_x mask_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] mask_stride_y Stride of the mask tensor in Y dimension (in bytes) + * @param[in] mask_step_y mask_stride_y * number of elements along y processed per workitem(in bytes) + * @param[in] mask_stride_z Stride of the mask tensor in Z dimension (in bytes) + * @param[in] mask_step_z mask_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] mask_offset_first_element_in_bytes The offset of the first element in the mask tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + */ +void main(void) +{ + Tensor3D src = GC_CONVERT_TO_TENSOR3D_STRUCT(src); + Tensor3D mask = GC_CONVERT_TO_TENSOR3D_STRUCT(mask); + Tensor3D dst = GC_CONVERT_TO_TENSOR3D_STRUCT(dst); + + float random = 0.f; + float inputv = 0.f; + float maskv = 0.f; + float outputv = 0.f; + +#ifdef FORWARD + random = rand(vec3(gl_GlobalInvocationID.xyz), SEED); + maskv = (random > RATIO) ? 1.f : 0.f; + GC_STORE1_3D_OFFSET(maskv, mask, 0, 0, 0); +#else /* FORWARD */ + GC_LOAD1_3D_OFFSET(maskv, mask, 0, 0, 0); +#endif /* FORWARD */ + + GC_LOAD1_3D_OFFSET(inputv, src, 0, 0, 0); + outputv = maskv * inputv * float(SCALE); + GC_STORE1_3D_OFFSET(outputv, dst, 0, 0, 0); +} + +#elif defined(DATA_TYPE_FP16) + +precision mediump float; + +BUFFER_DECLARATION(src, 1, uint, readonly); +BUFFER_DECLARATION(mask, 2, uint, ); +BUFFER_DECLARATION(dst, 3, uint, writeonly); + +/** Dropout is used to improve over-fit on neural networks. + * + * @note The data type must be passed at compile time using "#define DATA_TYPE_FP16" + * + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] mask_ptr Pointer to the mask tensor. Supported data types: same as @p src_ptr + * @param[in] mask_stride_x Stride of the mask tensor in X dimension (in bytes) + * @param[in] mask_step_x mask_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] mask_stride_y Stride of the mask tensor in Y dimension (in bytes) + * @param[in] mask_step_y mask_stride_y * number of elements along y processed per workitem(in bytes) + * @param[in] mask_stride_z Stride of the mask tensor in Z dimension (in bytes) + * @param[in] mask_step_z mask_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] mask_offset_first_element_in_bytes The offset of the first element in the mask tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + */ +void main(void) +{ + Tensor3D src = GC_CONVERT_TO_TENSOR3D_STRUCT(src); + Tensor3D mask = GC_CONVERT_TO_TENSOR3D_STRUCT(mask); + Tensor3D dst = GC_CONVERT_TO_TENSOR3D_STRUCT(dst); + + float random1 = 0.f; + float random2 = 0.f; + uint inputv = uint(0); + uint outputv = uint(0); + uint maskv = uint(0); + vec2 input_vec = vec2(0, 0); + vec2 output_vec = vec2(0, 0); + vec2 mask_vec = vec2(0, 0); + +#ifdef FORWARD + random1 = rand(vec3(gl_GlobalInvocationID.xyz), SEED); + random2 = rand(vec3(float(gl_GlobalInvocationID.x) + 0.5f, gl_GlobalInvocationID.yz), SEED); + mask_vec.x = (random1 > RATIO) ? 1.f : 0.f; + mask_vec.y = (random2 > RATIO) ? 1.f : 0.f; + maskv = packHalf2x16(mask_vec); + GC_STORE1_3D_OFFSET(maskv, mask, 0, 0, 0); +#else /* FORWARD */ + GC_LOAD1_3D_OFFSET(maskv, mask, 0, 0, 0); + mask_vec = unpackHalf2x16(maskv); +#endif /* FORWARD */ + + GC_LOAD1_3D_OFFSET(inputv, src, 0, 0, 0); + + input_vec = unpackHalf2x16(inputv); + output_vec = mask_vec * input_vec * float(SCALE); + outputv = packHalf2x16(output_vec); + + GC_STORE1_3D_OFFSET(outputv, dst, 0, 0, 0); +} + +#else /* DATA_TYPE_FP32 */ + +#endif /* DATA_TYPE_FP32 */ diff --git a/src/core/GLES_COMPUTE/cs_shaders/fill_border.cs b/src/core/GLES_COMPUTE/cs_shaders/fill_border.cs new file mode 100644 index 0000000000..01a39866c7 --- /dev/null +++ b/src/core/GLES_COMPUTE/cs_shaders/fill_border.cs @@ -0,0 +1,553 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in; +#include "helpers.h" + +#if defined(DATA_TYPE_FP32) +#ifdef FILL_IMAGE_BORDERS_REPLICATE +BUFFER_DECLARATION(buf, 1, float, restrict); +layout(std140) uniform shader_params +{ + TENSOR3D_PARAM_DECLARATION(buf); + uint width; + uint height; + int start_pos_x; + int start_pos_y; +}; + +/** Fill N pixel of the padding edge of a single channel image by replicating the closest valid pixel. + * + * @attention The border size for top, bottom, left, right needs to be passed at the compile time. + * e.g. BORDER_SIZE_TOP=0 BORDER_SIZE_BOTTOM=2 BORDER_SIZE_LEFT=0 BORDER_SIZE_RIGHT=2 + * + * @param[in,out] buf_ptr Pointer to the source image. Supported data types: F32 + * @param[in] buf_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] buf_step_x buf_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] buf_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] buf_step_y buf_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] buf_stride_z Stride between images if batching images (in bytes) + * @param[in] buf_step_z buf_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] buf_offset_first_element_in_bytes The offset of the first element in the source image + * @param[in] width Width of the valid region of the image + * @param[in] height Height of the valid region of the image + * @param[in] start_pos_x X coordinate indicating the start point of the valid region + * @param[in] start_pos_y Y coordinate indicating the start point of the valid region + */ +void main() +{ + Image buf = CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(buf); + + // Update pointer to point to the starting point of the valid region + buf.current_offset = uint(int(buf.current_offset) + ((start_pos_y * int(buf_stride_y) + start_pos_x * int(buf_stride_x)) >> 2)); + + int total_width = BORDER_SIZE_LEFT + int(width) + BORDER_SIZE_RIGHT; + int gid0 = int(gl_GlobalInvocationID.x); + int gidH = gid0 - total_width; + int gidW = gid0 - BORDER_SIZE_LEFT; + + if(gidH >= 0) + { + // Handle left border + float left_val = LOAD4(buf, offset(buf, 0, gidH)); + for(int i = -BORDER_SIZE_LEFT; i < 0; ++i) + { + STORE4(buf, offset(buf, i, gidH), left_val); + } + // Handle right border + float right_val = LOAD4(buf, offset(buf, int(width) - 1, gidH)); + for(int i = 0; i < BORDER_SIZE_RIGHT; ++i) + { + STORE4(buf, offset(buf, int(width) + i, gidH), right_val); + } + } + else + { + // Get value for corners + int val_idx = gidW; + if(gidW < 0 || gidW > (int(width) - 1)) + { + val_idx = gidW < 0 ? 0 : int(width) - 1; + } + + // Handle top border + float top_val = LOAD4(buf, offset(buf, val_idx, 0)); + for(int i = -BORDER_SIZE_TOP; i < 0; ++i) + { + STORE4(buf, offset(buf, gidW, i), top_val); + } + // Handle bottom border + float bottom_val = LOAD4(buf, offset(buf, val_idx, int(height) - 1)); + for(int i = 0; i < BORDER_SIZE_BOTTOM; ++i) + { + STORE4(buf, offset(buf, gidW, int(height) + i), bottom_val); + } + } +} +#endif /* FILL_IMAGE_BORDERS_REPLICATE */ + +#ifdef FILL_IMAGE_BORDERS_CONSTANT +BUFFER_DECLARATION(buf, 1, float, writeonly); +layout(std140) uniform shader_params +{ + TENSOR3D_PARAM_DECLARATION(buf); + uint width; + uint height; + int start_pos_x; + int start_pos_y; + float constant_value; +}; + +/** Fill N pixels of the padding edge of a single channel image with a constant value. + * + * @attention The border size for top, bottom, left, right needs to be passed at the compile time. + * e.g. BORDER_SIZE_TOP=0 BORDER_SIZE_BOTTOM=2 BORDER_SIZE_LEFT=0 BORDER_SIZE_RIGHT=2 + * + * @param[out] buf_ptr Pointer to the source image. Supported data types: F32 + * @param[in] buf_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] buf_step_x buf_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] buf_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] buf_step_y buf_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] buf_offset_first_element_in_bytes The offset of the first element in the source image + * @param[in] width Width of the valid region of the image + * @param[in] height Height of the valid region of the image + * @param[in] start_pos_x X coordinate indicating the start point of the valid region + * @param[in] start_pos_y Y coordinate indicating the start point of the valid region + * @param[in] constant_value Constant value to use to fill the edges + */ +void main() +{ + Image buf = CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(buf); + + // Update pointer to point to the starting point of the valid region + buf.current_offset = uint(int(buf.current_offset) + ((start_pos_y * int(buf_stride_y) + start_pos_x * int(buf_stride_x)) >> 2)); + + int total_width = BORDER_SIZE_LEFT + int(width) + BORDER_SIZE_RIGHT; + int gid0 = int(gl_GlobalInvocationID.x); + int gidH = gid0 - total_width; + int gidW = gid0 - BORDER_SIZE_LEFT; + + if(gidH >= 0) + { + // Handle left border + for(int i = -BORDER_SIZE_LEFT; i < 0; ++i) + { + STORE1(buf, offset(buf, i, gidH), constant_value); + } + // Handle right border + for(int i = 0; i < BORDER_SIZE_RIGHT; ++i) + { + STORE1(buf, offset(buf, int(width) + i, gidH), constant_value); + } + } + else + { + // Handle top border + for(int i = -BORDER_SIZE_TOP; i < 0; ++i) + { + STORE1(buf, offset(buf, gidW, i), constant_value); + } + // Handle bottom border + for(int i = 0; i < BORDER_SIZE_BOTTOM; ++i) + { + STORE1(buf, offset(buf, gidW, int(height) + i), constant_value); + } + } +} +#endif /* FILL_IMAGE_BORDERS_CONSTANT */ + +#elif defined(DATA_TYPE_FP16) +precision mediump float; + +#ifdef FILL_IMAGE_BORDERS_REPLICATE +BUFFER_DECLARATION(buf, 1, uint, restrict); +layout(std140) uniform shader_params +{ + TENSOR3D_PARAM_DECLARATION(buf); + uint width; + uint height; + int start_pos_x; + int start_pos_y; +}; + +void set_replicate(uint offset, int pos, uint replicate_value) +{ + uint packed_b; + LOAD1(packed_b, buf, offset); + + vec2 b = unpackHalf2x16(packed_b); + vec2 c = unpackHalf2x16(replicate_value); + + if(pos % 2 == 0) + { + b.x = c.y; + } + else + { + b.y = c.x; + } + + packed_b = packHalf2x16(b); + + STORE1(buf, offset, packed_b); +} + +/** Fill N pixel of the padding edge of a single channel image by replicating the closest valid pixel. + * + * @attention The border size for top, bottom, left, right needs to be passed at the compile time. + * e.g. BORDER_SIZE_TOP=0 BORDER_SIZE_BOTTOM=2 BORDER_SIZE_LEFT=0 BORDER_SIZE_RIGHT=2 + * + * @param[in,out] buf_ptr Pointer to the source image. Supported data types: F16 + * @param[in] buf_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] buf_step_x buf_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] buf_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] buf_step_y buf_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] buf_stride_z Stride between images if batching images (in bytes) + * @param[in] buf_step_z buf_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] buf_offset_first_element_in_bytes The offset of the first element in the source image + * @param[in] width Width of the valid region of the image + * @param[in] height Height of the valid region of the image + * @param[in] start_pos_x X coordinate indicating the start point of the valid region + * @param[in] start_pos_y Y coordinate indicating the start point of the valid region + */ +void main() +{ + Image buf = CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP_FP16(buf); + + // Update pointer to point to the starting point of the valid region + buf.current_offset = uint(buf.current_offset + uint(start_pos_y) * buf_stride_y + uint(start_pos_x) * buf_stride_x); + + int total_width = BORDER_SIZE_LEFT + int(width) + BORDER_SIZE_RIGHT; + int gid0 = int(gl_GlobalInvocationID.x); + int gidH = gid0 - total_width; + int gidW = gid0 - BORDER_SIZE_LEFT; + + if(gidH >= 0) + { + // Handle left border + uint left_val; + LOAD1(left_val, buf, offset_fp16(buf, 0, gidH) >> uint(2)); + for(int i = -BORDER_SIZE_LEFT; i < 0; ++i) + { + uint offset = offset_fp16(buf, i, gidH) >> 2; + int pos = i + BORDER_SIZE_LEFT; + if(i == -1) + { + if(pos % 2 == 0) + { + set_replicate(offset, pos, left_val); + } + } + else + { + if(pos % 2 == 0) + { + vec2 a = unpackHalf2x16(left_val); + uint b = packHalf2x16(a.xx); + STORE1(buf, offset, b); + } + } + } + // Handle right border + uint right_val; + LOAD1(right_val, buf, offset_fp16(buf, int(width) - 1, gidH) >> uint(2)); + for(int i = 0; i < BORDER_SIZE_RIGHT; ++i) + { + uint offset = offset_fp16(buf, int(width) + i, gidH) >> 2; + int pos = i + BORDER_SIZE_LEFT + int(width); + + if(i == 0) + { + if(pos % 2 == 0) + { + vec2 a = unpackHalf2x16(right_val); + uint b = packHalf2x16(a.yy); + STORE1(buf, offset, b); + } + else + { + set_replicate(offset, pos, right_val); + } + } + else + { + if(pos % 2 == 0) + { + vec2 a = unpackHalf2x16(right_val); + uint b = packHalf2x16(a.yy); + STORE1(buf, offset, b); + } + } + } + } + else + { + // Get value for corners + int val_idx = gidW; + if(gidW < 0 || (gidW > (int(width) - 1))) + { + val_idx = gidW < 0 ? 0 : (int(width) - 1); + } + + // Handle top border + uint top_val; + LOAD1(top_val, buf, offset_fp16(buf, val_idx, 0) >> uint(2)); + for(int i = -BORDER_SIZE_TOP; i < 0; ++i) + { + uint offset = offset_fp16(buf, gidW, i) >> 2; + + if(gid0 % 2 == 0) + { + if(gidW == (int(width) - 1)) + { + vec2 a = unpackHalf2x16(top_val); + uint b = packHalf2x16(a.xx); + STORE1(buf, offset, b); + } + else + { + if(gidW < 0) + { + vec2 a = unpackHalf2x16(top_val); + uint b; + if(BORDER_SIZE_LEFT % 2 == 0) + { + b = packHalf2x16(a.xx); + } + else + { + b = packHalf2x16(a.yy); + } + STORE1(buf, offset, b); + } + else if(gidW >= int(width)) + { + vec2 a = unpackHalf2x16(top_val); + uint b; + if((BORDER_SIZE_LEFT + int(width)) % 2 == 0) + { + b = packHalf2x16(a.yy); + } + STORE1(buf, offset, b); + } + else + { + STORE1(buf, offset, top_val); + } + } + } + } + // Handle bottom border + uint bottom_val; + LOAD1(bottom_val, buf, offset_fp16(buf, val_idx, int(height) - 1) >> uint(2)); + for(int i = 0; i < BORDER_SIZE_BOTTOM; ++i) + { + uint offset = offset_fp16(buf, gidW, int(height) + i) >> 2; + + if(gid0 % 2 == 0) + { + if(gidW == (int(width) - 1)) + { + vec2 a = unpackHalf2x16(bottom_val); + uint b = packHalf2x16(a.xx); + STORE1(buf, offset, b); + } + else + { + if(gidW < 0) + { + vec2 a = unpackHalf2x16(bottom_val); + uint b; + if(BORDER_SIZE_LEFT % 2 == 0) + { + b = packHalf2x16(a.xx); + } + else + { + b = packHalf2x16(a.yy); + } + STORE1(buf, offset, b); + } + else if(gidW >= int(width)) + { + vec2 a = unpackHalf2x16(bottom_val); + uint b; + if((BORDER_SIZE_LEFT + int(width)) % 2 == 0) + { + b = packHalf2x16(a.yy); + } + STORE1(buf, offset, b); + } + else + { + STORE1(buf, offset, bottom_val); + } + } + } + } + } +} +#endif /* FILL_IMAGE_BORDERS_REPLICATE */ + +#ifdef FILL_IMAGE_BORDERS_CONSTANT +BUFFER_DECLARATION(buf, 1, uint, restrict); + +layout(std140) uniform shader_params +{ + TENSOR3D_PARAM_DECLARATION(buf); + uint width; + uint height; + int start_pos_x; + int start_pos_y; + float constant_value; +}; + +void set_constant(uint offset, int pos) +{ + uint packed_b; + LOAD1(packed_b, buf, offset); + + vec2 b = unpackHalf2x16(packed_b); + + if(pos % 2 == 0) + { + b.x = constant_value; + } + else + { + b.y = constant_value; + } + + packed_b = packHalf2x16(b); + + STORE1(buf, offset, packed_b); +} + +/** Fill N pixels of the padding edge of a single channel image with a constant value. + * + * @attention The border size for top, bottom, left, right needs to be passed at the compile time. + * e.g. BORDER_SIZE_TOP=0 BORDER_SIZE_BOTTOM=2 BORDER_SIZE_LEFT=0 BORDER_SIZE_RIGHT=2 + * + * @param[out] buf_ptr Pointer to the source image. Supported data types: F16 + * @param[in] buf_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] buf_step_x buf_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] buf_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] buf_step_y buf_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] buf_offset_first_element_in_bytes The offset of the first element in the source image + * @param[in] width Width of the valid region of the image + * @param[in] height Height of the valid region of the image + * @param[in] start_pos_x X coordinate indicating the start point of the valid region + * @param[in] start_pos_y Y coordinate indicating the start point of the valid region + * @param[in] constant_value Constant value to use to fill the edges + */ +void main() +{ + Image buf = CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP_FP16(buf); + + int total_width = BORDER_SIZE_LEFT + int(width) + BORDER_SIZE_RIGHT; + int gid0 = int(gl_GlobalInvocationID.x); + int gidH = gid0 - total_width; + int gidW = gid0 - BORDER_SIZE_LEFT; + + // Update pointer to point to the starting point of the valid region + buf.current_offset = uint(int(buf.current_offset) + ((start_pos_y * int(buf_stride_y) + start_pos_x * int(buf_stride_x)))); + + vec2 b = vec2(constant_value, constant_value); + + uint packed_b = packHalf2x16(b); + + if(gidH >= 0) + { + // Handle left border + for(int i = -BORDER_SIZE_LEFT; i < 0; ++i) + { + uint offset = offset_fp16(buf, i, gidH) >> 2; + int pos = i + BORDER_SIZE_LEFT; + + if(i == -1) + { + if(pos % 2 == 0) + { + set_constant(offset, pos); + } + } + else + { + if(pos % 2 == 0) + { + STORE1(buf, offset, packed_b); + } + } + } + // Handle right border + for(int i = 0; i < BORDER_SIZE_RIGHT; ++i) + { + uint offset = offset_fp16(buf, int(width) + i, gidH) >> 2; + int pos = i + BORDER_SIZE_LEFT + int(width); + + if(i == 0) + { + if(pos % 2 == 0) + { + STORE1(buf, offset, packed_b); + } + else + { + set_constant(offset, pos); + } + } + else + { + if(pos % 2 == 0) + { + STORE1(buf, offset, packed_b); + } + } + } + } + else + { + // Handle top border + for(int i = -BORDER_SIZE_TOP; i < 0; ++i) + { + uint offset = offset_fp16(buf, gidW, i) >> 2; + + if(gid0 % 2 == 0) + { + STORE1(buf, offset, packed_b); + } + } + // Handle bottom border + for(int i = 0; i < BORDER_SIZE_BOTTOM; ++i) + { + uint offset = offset_fp16(buf, gidW, int(height) + i) >> 2; + + if(gid0 % 2 == 0) + { + STORE1(buf, offset, packed_b); + } + } + } +} +#endif /* FILL_IMAGE_BORDERS_CONSTANT */ +#endif /* DATA_TYPE_FP32 */ diff --git a/src/core/GLES_COMPUTE/cs_shaders/gemm.cs b/src/core/GLES_COMPUTE/cs_shaders/gemm.cs new file mode 100755 index 0000000000..3313b88718 --- /dev/null +++ b/src/core/GLES_COMPUTE/cs_shaders/gemm.cs @@ -0,0 +1,623 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in; +#include "helpers.h" + +#if defined(DATA_TYPE_FP32) +#define LOAD8(r, name, offset) \ + r.x = LOAD4(name, offset); \ + r.y = LOAD4(name, offset + uint(1)) + +#define LOAD16(r, name, offset) \ + r.x = LOAD4(name, offset); \ + r.y = LOAD4(name, offset + uint(1)); \ + r.z = LOAD4(name, offset + uint(2)); \ + r.w = LOAD4(name, offset + uint(3)) + +#define STORE16(name, offset, r) \ + STORE4(name, offset, r.x); \ + STORE4(name, offset + uint(1), r.y); \ + STORE4(name, offset + uint(2), r.z); \ + STORE4(name, offset + uint(3), r.w) + +#ifdef GEMM_TRANSPOSE1xW +BUFFER_DECLARATION(src, 1, float, readonly); +BUFFER_DECLARATION(dst, 2, float, writeonly); + +layout(std140) uniform shader_params +{ + IMAGE_PARAM_DECLARATION(src); + IMAGE_PARAM_DECLARATION(dst); +}; + +/** This OpenGL ES kernel computes the "vector" 1x4 transposition of input matrix + * + * @param[in] src_ptr Pointer to the source matrix. Supported data types: F32 + * @param[in] src_stride_x Stride of the source matrix in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source matrix in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source matrix + * @param[out] dst_ptr Pointer to the destination matrix Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in bytes) + * @param[in] dst_step_x dst_gx_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in bytes) + * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination matrix + */ +void main(void) +{ + /* Compute address for Matrix B - source */ + Image src = CONVERT_TO_IMAGE_STRUCT(src); + Image dst = CONVERT_TO_IMAGE_STRUCT(dst); + + /* Compute address for Matrix B transposed - destination. X and Y are swapped */ + uint dst_addr_in_bytes = (gl_GlobalInvocationID.y * uint(16) + gl_GlobalInvocationID.x * dst.stride_y + dst.offset_first_element_in_bytes) >> 2; + vec4 b0; + LOAD16(b0, src, offset(src, 0, 0)); + STORE16(dst, dst_addr_in_bytes, b0); +} +#endif /* GEMM_TRANSPOSE1xW */ + +#ifdef GEMM_INTERLEAVE4x4 +BUFFER_DECLARATION(src, 1, float, readonly); +BUFFER_DECLARATION(dst, 2, float, writeonly); + +layout(std140) uniform shader_params +{ + IMAGE_PARAM_DECLARATION(src); + IMAGE_PARAM_DECLARATION(dst); +}; + +/** This OpenGLES kernel reshapes the input matrix interleaving the values + * + * @param[in] src_ptr Pointer to the source matrix. Supported data types: F32 + * @param[in] src_stride_x Stride of the source matrix in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source matrix in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source matrix + * @param[out] dst_ptr Pointer to the destination matrix Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in bytes) + * @param[in] dst_step_x dst_gx_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in bytes) + * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination matrix + */ +void main(void) +{ + /* Compute source and destination addresses */ + Image src = CONVERT_TO_IMAGE_STRUCT(src); + Image dst = CONVERT_TO_IMAGE_STRUCT(dst); + + int i; + int j; + + for(i = 0; i < 4; ++i) + { + for(j = 0; j < 4; ++j) + { + float res = LOAD4(src, offset(src, i, j)); + uint ofset0 = CURRENT_OFFSET(dst) + uint(i * 4 + j); + STORE4(dst, ofset0, res); + } + } +} +#endif /* GEMM_INTERLEAVE4x4 */ + +#ifdef GEMM_ACCUMULATE_BIASES +BUFFER_DECLARATION(accum, 1, float, restrict); +BUFFER_DECLARATION(biases, 2, float, readonly); + +layout(std140) uniform shader_params +{ + IMAGE_PARAM_DECLARATION(accum); + VECTOR_PARAM_DECLARATION(biases); +}; + +/** This kernel accumulates each row with the biases vector + * + * @param[in, out] accum_ptr Pointer to the accumulate tensor. Supported data type: F32 + * @param[in] accum_stride_x Stride of the accmulate tensor in X dimension (in bytes) + * @param[in] accum_step_x accum_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] accum_stride_y Stride of the accumlulate tensor in Y dimension (in bytes) + * @param[in] accum_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] accum_offset_first_element_in_bytes The offset of the first element in the accumulate tensor + * @param[in] biases_ptr Pointer to the biases vector. Same as @p accum_ptr + * @param[in] biases_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] biases_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] biases_offset_first_element_in_bytes The offset of the first element in the destination tensor + */ +void main(void) +{ + Image accum = CONVERT_TO_IMAGE_STRUCT(accum); + Vector biases = CONVERT_TO_VECTOR_STRUCT(biases); + + for(int i = 0; i < 16; ++i) + { + float accum_value = LOAD4(accum, CURRENT_OFFSET(accum) + uint(i)); + float biases_value = LOAD4(biases, CURRENT_OFFSET(biases) + uint(i)); + accum_value = biases_value + accum_value; + + // Store result in the accummulate buffer + STORE4(accum, CURRENT_OFFSET(accum) + uint(i), accum_value); + } +} +#endif /* GEMM_ACCUMULATE_BIASES */ + +#ifdef GEMM_MM_INTERLEAVED_TRANSPOSED /* unvalidate */ +BUFFER_DECLARATION(src0, 1, float, readonly); +BUFFER_DECLARATION(src1, 2, float, readonly); +BUFFER_DECLARATION(dst, 3, float, writeonly); + +layout(std140) uniform shader_params +{ + IMAGE_PARAM_DECLARATION(src0); + IMAGE_PARAM_DECLARATION(src1); + IMAGE_PARAM_DECLARATION(dst); +}; + +/** This OpenGL ES kernel is optimised for Midgard. It computes the matrix multiplication between matrix A (src0) and matrix B (src1) + * Matrix A and matrix B must be reshaped respectively with @ref gemm_interleave4x4_32bit and @ref gemm_transpose1x4 before running the matrix multiplication + * + * @attention The width of matrix B and the alpha's value need to be passed at compile time using WIDTH_MATRIX_B and ALPHA + * + * @param[in] src0_ptr Pointer to the source matrix. Supported data types: F32 + * @param[in] src0_stride_x Stride of the source matrix in X dimension (in bytes) + * @param[in] src0_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src0_stride_y Stride of the source matrix in Y dimension (in bytes) + * @param[in] src0_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src0_offset_first_element_in_bytes The offset of the first element in the source matrix + * @param[in] src1_ptr Pointer to the source matrix. Supported data types: same as @p src0_ptr + * @param[in] src1_stride_x Stride of the source matrix in X dimension (in bytes) + * @param[in] src1_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src1_stride_y Stride of the source matrix in Y dimension (in bytes) + * @param[in] src1_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source matrix + * @param[out] dst_ptr Pointer to the destination matrix Supported data types: same as @p src0_ptr + * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in bytes) + * @param[in] dst_step_x dst_gx_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in bytes) + * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination matrix + */ +void main() +{ + Image src0 = CONVERT_TO_IMAGE_STRUCT(src0); + Image src1 = CONVERT_TO_IMAGE_STRUCT(src1); + Image dst = CONVERT_TO_IMAGE_STRUCT(dst); + + /* Compute address for matrix A and B */ + src0.current_offset = (src0.offset_first_element_in_bytes + (uint(gl_GlobalInvocationID.y) * uint(src0.stride_y))) >> uint(2); + src1.current_offset = (src1.offset_first_element_in_bytes + (uint(gl_GlobalInvocationID.x) * uint(src1.stride_y))) >> uint(2); + + /* Compute end row address for matrix B */ + int end_row_mtx_b = int(src1.current_offset) + int(COLS_B); + + /* Reset accumulators */ + vec4 c00 = vec4(0.0f); + vec4 c10 = vec4(0.0f); + vec4 c20 = vec4(0.0f); + vec4 c30 = vec4(0.0f); + + // FIXME: loop unrolling really needed for GLES? + for(; int(src1.current_offset) <= (end_row_mtx_b - 8); src0.current_offset += uint(8), src1.current_offset += uint(8)) + { + /* Load values from matrix A (interleaved) and matrix B (transposed) */ + vec4 a0; + vec4 b0; + LOAD16(a0, src0, src0.current_offset); + LOAD16(b0, src1, src1.current_offset); + + c00 += vec4(a0.x) * b0; + c10 += vec4(a0.y) * b0; + c20 += vec4(a0.z) * b0; + c30 += vec4(a0.w) * b0; + + /* Load values from matrix A (interleaved) and matrix B (transposed) */ + LOAD16(a0, src0, src0.current_offset + uint(4)); + LOAD16(b0, src1, src1.current_offset + uint(4)); + + c00 += vec4(a0.x) * b0; + c10 += vec4(a0.y) * b0; + c20 += vec4(a0.z) * b0; + c30 += vec4(a0.w) * b0; + } + + for(; int(src1.current_offset) < end_row_mtx_b; src0.current_offset += uint(4), src1.current_offset += uint(4)) + { + /* Load values from matrix A (interleaved) and matrix B (transposed) */ + vec4 a0; + vec4 b0; + LOAD16(a0, src0, src0.current_offset); + LOAD16(b0, src1, src1.current_offset); + + c00 += vec4(a0.x) * b0; + c10 += vec4(a0.y) * b0; + c20 += vec4(a0.z) * b0; + c30 += vec4(a0.w) * b0; + } + + /* Multiply by the weight of matrix product */ + c00 = c00 * vec4(ALPHA); + c10 = c10 * vec4(ALPHA); + c20 = c20 * vec4(ALPHA); + c30 = c30 * vec4(ALPHA); + + /* Store 4x4 block */ + STORE16(dst, offset(dst, 0, 0), c00); + STORE16(dst, offset(dst, 0, 1), c10); + STORE16(dst, offset(dst, 0, 2), c20); + STORE16(dst, offset(dst, 0, 3), c30); +} +#endif /* GEMM_MM_INTERLEAVED_TRANSPOSED */ + +#ifdef GEMM_MM_FLOATING_POINT +BUFFER_DECLARATION(src0, 1, float, readonly); +BUFFER_DECLARATION(src1, 2, float, readonly); +BUFFER_DECLARATION(dst, 3, float, writeonly); + +layout(std140) uniform shader_params +{ + IMAGE_PARAM_DECLARATION(src0); + IMAGE_PARAM_DECLARATION(src1); + IMAGE_PARAM_DECLARATION(dst); +}; + +/** This OpenGL ES kernel computes the matrix multiplication between matrix A (src0) and matrix B (src1) + * Matrix A and matrix B must be reshaped respectively with @ref gemm_interleave4x4_32bit and @ref gemm_transpose1x4 before running the matrix multiplication + * + * @attention The width of matrix B and the alpha's value need to be passed at compile time using WIDTH_MATRIX_B and ALPHA + * + * @param[in] src0_ptr Pointer to the source matrix. Supported data types: F32 + * @param[in] src0_stride_x Stride of the source matrix in X dimension (in bytes) + * @param[in] src0_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src0_stride_y Stride of the source matrix in Y dimension (in bytes) + * @param[in] src0_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src0_offset_first_element_in_bytes The offset of the first element in the source matrix + * @param[in] src1_ptr Pointer to the source matrix. Supported data types: same as @p src0_ptr + * @param[in] src1_stride_x Stride of the source matrix in X dimension (in bytes) + * @param[in] src1_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src1_stride_y Stride of the source matrix in Y dimension (in bytes) + * @param[in] src1_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source matrix + * @param[out] dst_ptr Pointer to the destination matrix Supported data types: same as @p src0_ptr + * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in bytes) + * @param[in] dst_step_x dst_gx_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in bytes) + * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination matrix + */ +void main() +{ + Image src0 = CONVERT_TO_IMAGE_STRUCT(src0); + Image src1 = CONVERT_TO_IMAGE_STRUCT(src1); + Image dst = CONVERT_TO_IMAGE_STRUCT(dst); + + int idx = int(gl_GlobalInvocationID.x) * int(NUM_ELEMS_PROCESSED_PER_THREAD_X); + /* Compute the address for the vector A and matrix B */ + src0.current_offset = (src0_offset_first_element_in_bytes + uint(gl_GlobalInvocationID.y) * src0_stride_y * uint(NUM_ELEMS_PROCESSED_PER_THREAD_Y)) >> uint(2); + src1.current_offset = (src1_offset_first_element_in_bytes + uint(idx * 4)) >> uint(2); + + /* Compute end row address for matrix A */ + int end_row_vec_a = int(src0.current_offset) + ((COLS_A * 4) >> 2); + + /* Reset accumulators */ + vec4 acc0 = vec4(0.0f); +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 + vec4 acc1 = vec4(0.0f); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 + vec4 acc2 = vec4(0.0f); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 + vec4 acc3 = vec4(0.0f); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 + + for(; int(src0.current_offset) <= (end_row_vec_a - 2); src0.current_offset += uint(2), src1.current_offset += uint((2 * int(src1_stride_y)) >> 2)) + { + vec2 a0; + LOAD8(a0, src0, src0.current_offset); +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 + vec2 a1; + LOAD8(a1, src0, src0.current_offset + (src0_stride_y >> uint(2))); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 + vec2 a2; + LOAD8(a2, src0, src0.current_offset + ((uint(2) * src0_stride_y) >> uint(2))); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 + vec2 a3; + LOAD8(a3, src0, src0.current_offset + ((uint(3) * src0_stride_y) >> uint(2))); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 + + vec4 b0; + vec4 b1; + LOAD16(b0, src1, src1.current_offset); + LOAD16(b1, src1, src1.current_offset + (src1_stride_y >> uint(2))); + + acc0 += b0 * vec4(a0.x); + acc0 += b1 * vec4(a0.y); +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 + acc1 += b0 * vec4(a1.x); + acc1 += b1 * vec4(a1.y); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 + acc2 += b0 * vec4(a2.x); + acc2 += b1 * vec4(a2.y); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 + acc3 += b0 * vec4(a3.x); + acc3 += b1 * vec4(a3.y); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 + } + + for(; int(src0.current_offset) < end_row_vec_a; src0.current_offset += uint(1), src1.current_offset += uint(int(src1_stride_y) >> 2)) + { + // Load values from matrix A + float a0; + a0 = LOAD4(src0, src0.current_offset); +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 + float a1; + a1 = LOAD4(src0, src0.current_offset + ((uint(1) * src0_stride_y) >> uint(2))); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 + float a2; + a2 = LOAD4(src0, src0.current_offset + ((uint(2) * src0_stride_y) >> uint(2))); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 + float a3; + a3 = LOAD4(src0, src0.current_offset + ((uint(3) * src0_stride_y) >> uint(2))); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 + + vec4 b0; + LOAD16(b0, src1, src1.current_offset); + + acc0 += b0 * vec4(a0); +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 + acc1 += b0 * vec4(a1); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 + acc2 += b0 * vec4(a2); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 + acc3 += b0 * vec4(a3); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 + } + + /* Multiply by the weight of vector-matrix product */ + acc0 = acc0 * vec4(ALPHA); + STORE16(dst, offset(dst, 0, 0), acc0); +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 + acc1 = acc1 * vec4(ALPHA); + STORE16(dst, offset(dst, 0, 1), acc1); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 1 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 + acc2 = acc2 * vec4(ALPHA); + STORE16(dst, offset(dst, 0, 2), acc2); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 2 +#if NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 + acc3 = acc3 * vec4(ALPHA); + STORE16(dst, offset(dst, 0, 3), acc3); +#endif // NUM_ELEMS_PROCESSED_PER_THREAD_Y > 3 +} +#endif /* GEMM_MM_FLOATING_POINT */ + +#ifdef GEMM_MATRIXADDITION +BUFFER_DECLARATION(src, 1, float, readonly); +BUFFER_DECLARATION(dst, 2, float, restrict); + +layout(std140) uniform shader_params +{ + IMAGE_PARAM_DECLARATION(src); + IMAGE_PARAM_DECLARATION(dst); +}; + +/** This OpenGL ES kernel performs the in-place matrix addition between 2 matrices taking into account that the second matrix might be weighted by a scalar value beta: + * + * @attention The beta's value need to be passed at compile time using BETA + * + * @param[in] src_ptr Pointer to the source matrix. Supported data types: F32 + * @param[in] src_stride_x Stride of the source matrix in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source matrix in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source matrix + * @param[out] dst_ptr Pointer to the destination matrix Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in bytes) + * @param[in] dst_step_x dst_gx_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in bytes) + * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination matrix + */ +void main(void) +{ + /* Compute source and destination addresses */ + Image src = CONVERT_TO_IMAGE_STRUCT(src); + Image dst = CONVERT_TO_IMAGE_STRUCT(dst); + + /* Load values from A x B */ + vec4 alpha_ab; + vec4 c; + vec4 out1; + + LOAD16(alpha_ab, dst, dst.current_offset); + LOAD16(c, src, src.current_offset); + + /* Computes alpha * axb + beta * c */ + out1 = alpha_ab + vec4(BETA * c); + + /* Store final result in axb matrix */ + STORE16(dst, dst.current_offset, out1); +} +#endif /* GEMM_MATRIXADDITION */ +#elif defined(DATA_TYPE_FP16) +precision mediump float; +#ifdef GEMM_MM_FLOATING_POINT +BUFFER_DECLARATION(src0, 1, uint, readonly); +BUFFER_DECLARATION(src1, 2, uvec2, readonly); +BUFFER_DECLARATION(dst, 3, uvec2, writeonly); + +layout(std140) uniform shader_params +{ + IMAGE_PARAM_DECLARATION(src0); + IMAGE_PARAM_DECLARATION(src1); + IMAGE_PARAM_DECLARATION(dst); +}; + +/** This OpenGL ES kernel computes the matrix multiplication between matrix A (src0) and matrix B (src1) + * Matrix A and matrix B must be reshaped respectively with @ref gemm_interleave4x4_32bit and @ref gemm_transpose1x4 before running the matrix multiplication + * + * @attention The width of matrix B and the alpha's value need to be passed at compile time using WIDTH_MATRIX_B and ALPHA + * + * @param[in] src0_ptr Pointer to the source matrix. Supported data types: F32 + * @param[in] src0_stride_x Stride of the source matrix in X dimension (in bytes) + * @param[in] src0_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src0_stride_y Stride of the source matrix in Y dimension (in bytes) + * @param[in] src0_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src0_offset_first_element_in_bytes The offset of the first element in the source matrix + * @param[in] src1_ptr Pointer to the source matrix. Supported data types: same as @p src0_ptr + * @param[in] src1_stride_x Stride of the source matrix in X dimension (in bytes) + * @param[in] src1_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src1_stride_y Stride of the source matrix in Y dimension (in bytes) + * @param[in] src1_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source matrix + * @param[out] dst_ptr Pointer to the destination matrix Supported data types: same as @p src0_ptr + * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in bytes) + * @param[in] dst_step_x dst_gx_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in bytes) + * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination matrix + */ +void main() +{ + Image src0 = GC_CONVERT_TO_IMAGE_STRUCT(src0); + Image src1 = GC_CONVERT_TO_IMAGE_STRUCT(src1); + Image dst = GC_CONVERT_TO_IMAGE_STRUCT(dst); + + int idx = int(gl_GlobalInvocationID.x) * int(NUM_ELEMS_PROCESSED_PER_THREAD_X); + /* Compute the address for the vector A and matrix B */ + src0.current_offset = (src0_offset_first_element_in_bytes + uint(gl_GlobalInvocationID.y) * src0_stride_y * uint(NUM_ELEMS_PROCESSED_PER_THREAD_Y)); + src1.current_offset = src1_offset_first_element_in_bytes + uint(idx) * src1_stride_x; + + /* Compute end row address for matrix A */ + uint end_row_vec_a = src0.current_offset + uint(COLS_A << 1); + + /* Reset accumulators */ + vec4 acc0 = vec4(0.0f); + + for(; src0.current_offset < (end_row_vec_a - uint(2)); src0.current_offset += uint(2 * 2), src1.current_offset += uint(2) * src1_stride_y) + { + uint packed_a0; + vec2 a0; + + GC_LOAD1_2D_OFFSET(packed_a0, src0, 0, 0); + a0 = vec2(unpackHalf2x16(packed_a0)); + + uvec2 packed_b0; + uvec2 packed_b1; + vec4 b0; + vec4 b1; + + GC_LOAD1_2D_OFFSET(packed_b0, src1, 0, 0); + GC_LOAD1_2D_OFFSET(packed_b1, src1, 0, 1); + + b0 = vec4(unpackHalf2x16(packed_b0.x), unpackHalf2x16(packed_b0.y)); + b1 = vec4(unpackHalf2x16(packed_b1.x), unpackHalf2x16(packed_b1.y)); + + acc0 += b0 * vec4(a0.x); + acc0 += b1 * vec4(a0.y); + } + + for(; src0.current_offset < end_row_vec_a; src0.current_offset += uint(2 * 2), src1.current_offset += src1_stride_y) + { + uint packed_a0; + vec2 a0; + + GC_LOAD1_2D_OFFSET(packed_a0, src0, 0, 0); + a0 = vec2(unpackHalf2x16(packed_a0)); + + uvec2 packed_b0; + vec4 b0; + + GC_LOAD1_2D_OFFSET(packed_b0, src1, 0, 0); + + b0 = vec4(unpackHalf2x16(packed_b0.x), unpackHalf2x16(packed_b0.y)); + + acc0 += b0 * (a0.x); + } + + /* Multiply by the weight of vector-matrix product */ + acc0 = acc0 * vec4(ALPHA); + + uvec2 packed_d; + packed_d = uvec2(packHalf2x16(acc0.xy), packHalf2x16(acc0.zw)); + GC_STORE1_2D_OFFSET(packed_d, dst, 0, 0); +} +#endif /* GEMM_MM_FLOATING_POINT */ + +#ifdef GEMM_ACCUMULATE_BIASES +BUFFER_DECLARATION(accum, 1, uvec2, restrict); +BUFFER_DECLARATION(biases, 2, uvec2, readonly); + +layout(std140) uniform shader_params +{ + IMAGE_PARAM_DECLARATION(accum); + VECTOR_PARAM_DECLARATION(biases); +}; + +/** This kernel accumulates each row with the biases vector + * + * @param[in, out] accum_ptr Pointer to the accumulate tensor. Supported data type: F16 + * @param[in] accum_stride_x Stride of the accmulate tensor in X dimension (in bytes) + * @param[in] accum_step_x accum_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] accum_stride_y Stride of the accumlulate tensor in Y dimension (in bytes) + * @param[in] accum_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] accum_offset_first_element_in_bytes The offset of the first element in the accumulate tensor + * @param[in] biases_ptr Pointer to the biases vector. Same as @p accum_ptr + * @param[in] biases_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] biases_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] biases_offset_first_element_in_bytes The offset of the first element in the destination tensor + */ +void main(void) +{ + Image accum = GC_CONVERT_TO_IMAGE_STRUCT(accum); + Vector biases = GC_CONVERT_TO_VECTOR_STRUCT(biases); + + vec4 u[2]; + uvec2 packed_s[2]; + GC_LOAD1_2D_OFFSET(packed_s[0], accum, 0, 0); + GC_LOAD1_1D_OFFSET(packed_s[1], biases, 0); + u[0] = vec4(unpackHalf2x16(packed_s[0].x), unpackHalf2x16(packed_s[0].y)); + u[1] = vec4(unpackHalf2x16(packed_s[1].x), unpackHalf2x16(packed_s[1].y)); + + vec4 tmp; + tmp = u[0] + u[1]; + packed_s[0] = uvec2(packHalf2x16(tmp.xy), packHalf2x16(tmp.zw)); + GC_STORE1_2D_OFFSET(packed_s[0], accum, 0, 0); +} +#endif /* GEMM_ACCUMULATE_BIASES */ +#else /* DATA_TYPE_F32 */ +#error Data type not supported +#endif /* DATA_TYPE_F32 */ diff --git a/src/core/GLES_COMPUTE/cs_shaders/helpers.h b/src/core/GLES_COMPUTE/cs_shaders/helpers.h new file mode 100644 index 0000000000..86dedf5a9c --- /dev/null +++ b/src/core/GLES_COMPUTE/cs_shaders/helpers.h @@ -0,0 +1,582 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_HELPER_H +#define ARM_COMPUTE_HELPER_H + +#define CLAMP(x, min_val, max_val) min(max(x, min_val), max_val) + +#define VEC_DATA_TYPE_STR(type, size) type##size +#define VEC_DATA_TYPE(type, size) VEC_DATA_TYPE_STR(type, size) + +#define CONVERT(x, type) type(x) + +#define PACK(value, stype, dtype) \ + pack_##stype##_##dtype(value) + +#define UNPACK(value, stype, dtype) \ + unpack_##stype##_##dtype(value) + +#define BUFFER_DECLARATION(name, location, type, access) \ + layout(std430, binding = location) access buffer name##Buffer \ + { \ + type name##_ptr[]; \ + } + +#define VECTOR_PARAM_DECLARATION(name) \ + uint name##_stride_x; \ + uint name##_step_x; \ + uint name##_offset_first_element_in_bytes; \ + uint name##_buffer_data_type_size + +#define IMAGE_PARAM_DECLARATION(name) \ + uint name##_stride_x; \ + uint name##_step_x; \ + uint name##_stride_y; \ + uint name##_step_y; \ + uint name##_offset_first_element_in_bytes; \ + uint name##_buffer_data_type_size + +#define TENSOR3D_PARAM_DECLARATION(name) \ + uint name##_stride_x; \ + uint name##_step_x; \ + uint name##_stride_y; \ + uint name##_step_y; \ + uint name##_stride_z; \ + uint name##_step_z; \ + uint name##_offset_first_element_in_bytes; \ + uint name##_buffer_data_type_size + +/** Structure to hold Vector information */ +struct Vector +{ + uint current_offset; /**< Current offset of vector */ + uint offset_first_element_in_bytes; /**< The offset of the first element in the source image */ + uint stride_x; /**< Stride of the image in X dimension (in bytes) */ +}; + +/** Structure to hold Image information */ +struct Image +{ + uint current_offset; /**< Current offset of image */ + uint offset_first_element_in_bytes; /**< The offset of the first element in the source image */ + uint stride_x; /**< Stride of the image in X dimension (in bytes) */ + uint stride_y; /**< Stride of the image in Y dimension (in bytes) */ +}; + +/** Structure to hold 3D tensor information */ +struct Tensor3D +{ + uint current_offset; /**< Current offset of tensor */ + uint offset_first_element_in_bytes; /**< The offset of the first element in the source image */ + uint stride_x; /**< Stride of the image in X dimension (in bytes) */ + uint stride_y; /**< Stride of the image in Y dimension (in bytes) */ + uint stride_z; /**< Stride of the image in Z dimension (in bytes) */ +}; + +///////////////////////////////////////////////////////////// +// TODO: old to be removed + +#define CONVERT_TO_VECTOR_STRUCT(name) \ + update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x) + +#define CONVERT_TO_VECTOR_STRUCT_FP16(name) \ + update_vector_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x) + +#define CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \ + update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0)) + +#define CONVERT_TO_VECTOR_STRUCT_NO_STEP_FP16(name) \ + update_vector_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0)) + +#define CONVERT_TO_IMAGE_STRUCT(name) \ + update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y) + +#define CONVERT_TO_IMAGE_STRUCT_FP16(name) \ + update_image_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y) + +#define CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \ + update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0)) + +#define CONVERT_TO_IMAGE_STRUCT_NO_STEP_FP16(name) \ + update_image_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0)) + +#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \ + update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, name##_step_z) + +#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP_FP16(name) \ + update_image_from_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, name##_step_z) + +#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \ + update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z) + +#define CONVERT_TENSOR3D_TO_IMAGE_STRUCT_FP16(name) \ + update_image_from_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z) + +#define CONVERT_TO_TENSOR3D_STRUCT(name) \ + update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \ + name##_stride_z, name##_step_z) + +#define CONVERT_TO_TENSOR3D_STRUCT_FP16(name) \ + update_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \ + name##_stride_z, name##_step_z) + +#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \ + update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, uint(0)) + +#define CONVERT_TO_TENSOR3D_STRUCT_NO_STEP_FP16(name) \ + update_tensor3D_workitem_offset_fp16(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, uint(0)) + +// FIXME: Redesign the macros if different data types are supported. +#define LOAD4(name, offset) \ + name##_ptr[offset] + +#define STORE4(name, offset, value) \ + name##_ptr[offset] = value + +// Load 1 element, which size is determined by ssbo type. +#define LOAD1(r, name, offset) \ + r = name##_ptr[offset] + +#define STORE1(name, offset, value) \ + name##_ptr[offset] = value + +#define LOAD2(r, name, offset) \ + LOAD1(r[0], name, offset); \ + LOAD1(r[1], name, (offset) + uint(1)) + +#define STORE2(name, offset, value) \ + name##_ptr[offset] = value[0]; \ + name##_ptr[(offset) + uint(1)] = value[1] + +#define LOAD3(r, name, offset) \ + LOAD1(r[0], name, offset); \ + LOAD1(r[1], name, (offset) + uint(1)); \ + LOAD1(r[2], name, (offset) + uint(2)) + +#define CURRENT_OFFSET(name) \ + name.current_offset + +/** Wrap vector information into an Vector structure, and make the offset to be this workitem's position. + * + * @param[in] offset_first_element_in_bytes The offset of the first element in the source vector + * @param[in] stride_x Stride of the vector in X dimension (in bytes) + * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes) + * + * @return An vector object + */ +Vector update_vector_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x) +{ + Vector vector; + vector.offset_first_element_in_bytes = offset_first_element_in_bytes; + vector.stride_x = stride_x; + vector.current_offset = (vector.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x) >> 2; + + return vector; +} + +/** Wrap vector information into an Vector structure, and make the offset to be this workitem's position. + * + * @param[in] offset_first_element_in_bytes The offset of the first element in the source vector + * @param[in] stride_x Stride of the vector in X dimension (in bytes) + * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes) + * + * @return An vector object + */ +Vector update_vector_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x) +{ + Vector vector; + vector.offset_first_element_in_bytes = offset_first_element_in_bytes; + vector.stride_x = stride_x; + vector.current_offset = vector.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x; + + return vector; +} + +/** Wrap image information into an Image structure, and make the offset to be this workitem's position. + * + * @param[in] offset_first_element_in_bytes The offset of the first element in the source image + * @param[in] stride_x Stride of the image in X dimension (in bytes) + * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] stride_y Stride of the image in Y dimension (in bytes) + * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes) + * + * @return An image object + */ +Image update_image_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y) +{ + Image img; + img.offset_first_element_in_bytes = offset_first_element_in_bytes; + img.stride_x = stride_x; + img.stride_y = stride_y; + img.current_offset = (img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y) >> 2; + + return img; +} + +/** Wrap image information into an Image structure, and make the offset to be this workitem's position. + * + * @param[in] offset_first_element_in_bytes The offset of the first element in the source image + * @param[in] stride_x Stride of the image in X dimension (in bytes) + * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] stride_y Stride of the image in Y dimension (in bytes) + * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes) + * + * @return An image object + */ +Image update_image_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y) +{ + Image img; + img.offset_first_element_in_bytes = offset_first_element_in_bytes; + img.stride_x = stride_x; + img.stride_y = stride_y; + img.current_offset = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y; + + return img; +} + +/** Wrap 3D tensor information into an image structure, and make the offset to be this workitem's position. + * + * @param[in] offset_first_element_in_bytes The offset of the first element in the source image + * @param[in] stride_x Stride of the image in X dimension (in bytes) + * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] stride_y Stride of the image in Y dimension (in bytes) + * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] stride_z Stride of the image in Z dimension (in bytes) + * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes) + * + * @return A 2D Image object + */ +Image update_image_from_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z) +{ + Image img; + img.offset_first_element_in_bytes = offset_first_element_in_bytes; + img.stride_x = stride_x; + img.stride_y = stride_y; + img.current_offset = (img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z) >> 2; + + return img; +} + +/** Wrap 3D tensor information into an image structure, and make the offset to be this workitem's position. + * + * @param[in] offset_first_element_in_bytes The offset of the first element in the source image + * @param[in] stride_x Stride of the image in X dimension (in bytes) + * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] stride_y Stride of the image in Y dimension (in bytes) + * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] stride_z Stride of the image in Z dimension (in bytes) + * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes) + * + * @return A 2D Image object + */ +Image update_image_from_tensor3D_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z) +{ + Image img; + img.offset_first_element_in_bytes = offset_first_element_in_bytes; + img.stride_x = stride_x; + img.stride_y = stride_y; + img.current_offset = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z; + + return img; +} + +/** Wrap 3D tensor information into an tensor structure, and make the offset to be this workitem's position. + * + * @param[in] offset_first_element_in_bytes The offset of the first element in the source image + * @param[in] stride_x Stride of the image in X dimension (in bytes) + * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] stride_y Stride of the image in Y dimension (in bytes) + * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] stride_z Stride of the image in Z dimension (in bytes) + * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes) + * + * @return A 3D tensor object + */ +Tensor3D update_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z) +{ + Tensor3D tensor; + tensor.offset_first_element_in_bytes = offset_first_element_in_bytes; + tensor.stride_x = stride_x; + tensor.stride_y = stride_y; + tensor.stride_z = stride_z; + tensor.current_offset = (tensor.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z) >> 2; + + return tensor; +} + +/** Wrap 3D tensor information into an tensor structure, and make the offset to be this workitem's position. + * + * @param[in] offset_first_element_in_bytes The offset of the first element in the source image + * @param[in] stride_x Stride of the image in X dimension (in bytes) + * @param[in] step_x stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] stride_y Stride of the image in Y dimension (in bytes) + * @param[in] step_y stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] stride_z Stride of the image in Z dimension (in bytes) + * @param[in] step_z stride_z * number of elements along Z processed per workitem(in bytes) + * + * @return A 3D tensor object + */ +Tensor3D update_tensor3D_workitem_offset_fp16(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z) +{ + Tensor3D tensor; + tensor.offset_first_element_in_bytes = offset_first_element_in_bytes; + tensor.stride_x = stride_x; + tensor.stride_y = stride_y; + tensor.stride_z = stride_z; + tensor.current_offset = tensor.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z; + + return tensor; +} + +/** Get the pointer position of a Vector + * + * @param[in] vec Pointer to the starting position of the buffer + * @param[in] x Relative X position + */ +uint vector_offset(Vector vec, int x) +{ + return CONVERT(CONVERT(vec.current_offset << 2, int) + x * CONVERT(vec.stride_x, int), uint) >> 2; +} + +/** Get the pointer position of a Vector + * + * @param[in] vec Pointer to the starting position of the buffer + * @param[in] x Relative X position + */ +uint vector_offset_fp16(Vector vec, int x) +{ + return CONVERT(CONVERT(vec.current_offset, int) + x * CONVERT(vec.stride_x, int), uint); +} + +/** Get the pointer position of a Image + * + * @param[in] img Pointer to the starting position of the buffer + * @param[in] x Relative X position + * @param[in] y Relative Y position + */ +uint offset(Image img, int x, int y) +{ + return CONVERT(CONVERT(img.current_offset << 2, int) + x * CONVERT(img.stride_x, int) + y * CONVERT(img.stride_y, int), uint) >> 2; +} + +/** Get the pointer position of a Image + * + * @param[in] img Pointer to the starting position of the buffer + * @param[in] x Relative X position + * @param[in] y Relative Y position + */ +uint offset_fp16(Image img, int x, int y) +{ + return CONVERT(CONVERT(img.current_offset, int) + x * CONVERT(img.stride_x, int) + y * CONVERT(img.stride_y, int), uint); +} + +/** Get the pointer position of a Tensor3D + * + * @param[in] tensor Pointer to the starting postion of the buffer + * @param[in] x Relative X position + * @param[in] y Relative Y position + * @param[in] z Relative Z position + */ +uint tensor3D_offset(Tensor3D tensor, int x, int y, int z) +{ + return CONVERT(CONVERT(tensor.current_offset << 2, int) + x * CONVERT(tensor.stride_x, int) + y * CONVERT(tensor.stride_y, int) + z * CONVERT(tensor.stride_z, int), uint) >> 2; +} + +/** Get the pointer position of a Tensor3D + * + * @param[in] tensor Pointer to the starting postion of the buffer + * @param[in] x Relative X position + * @param[in] y Relative Y position + * @param[in] z Relative Z position + */ +uint tensor3D_offset_fp16(Tensor3D tensor, int x, int y, int z) +{ + return CONVERT(CONVERT(tensor.current_offset, int) + x * CONVERT(tensor.stride_x, int) + y * CONVERT(tensor.stride_y, int) + z * CONVERT(tensor.stride_z, int), uint); +} + +///////////////////////////////////////////////////////////// +// new one + +#define GC_CONVERT_TO_VECTOR_STRUCT(name) \ + gc_update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x) + +#define GC_CONVERT_TO_VECTOR_STRUCT_NO_STEP(name) \ + gc_update_vector_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0)) + +#define GC_CONVERT_TO_IMAGE_STRUCT(name) \ + gc_update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y) + +#define GC_CONVERT_TO_IMAGE_STRUCT_NO_STEP(name) \ + gc_update_image_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0)) + +#define GC_CONVERT_TO_TENSOR3D_STRUCT(name) \ + gc_update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, \ + name##_stride_z, name##_step_z) + +#define GC_CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(name) \ + gc_update_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, uint(0)) + +#define GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT(name) \ + gc_update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, name##_step_x, name##_stride_y, name##_step_y, name##_stride_z, name##_step_z) + +#define GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(name) \ + gc_update_image_from_tensor3D_workitem_offset(name##_offset_first_element_in_bytes, name##_stride_x, uint(0), name##_stride_y, uint(0), name##_stride_z, name##_step_z) + +Vector gc_update_vector_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x) +{ + Vector vector; + vector.offset_first_element_in_bytes = offset_first_element_in_bytes; + vector.stride_x = stride_x; + vector.current_offset = vector.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x; + + return vector; +} + +Image gc_update_image_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y) +{ + Image img; + img.offset_first_element_in_bytes = offset_first_element_in_bytes; + img.stride_x = stride_x; + img.stride_y = stride_y; + img.current_offset = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y; + + return img; +} + +Tensor3D gc_update_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z) +{ + Tensor3D tensor; + tensor.offset_first_element_in_bytes = offset_first_element_in_bytes; + tensor.stride_x = stride_x; + tensor.stride_y = stride_y; + tensor.stride_z = stride_z; + tensor.current_offset = tensor.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z; + + return tensor; +} + +Image gc_update_image_from_tensor3D_workitem_offset(uint offset_first_element_in_bytes, uint stride_x, uint step_x, uint stride_y, uint step_y, uint stride_z, uint step_z) +{ + Image img; + img.offset_first_element_in_bytes = offset_first_element_in_bytes; + img.stride_x = stride_x; + img.stride_y = stride_y; + img.current_offset = img.offset_first_element_in_bytes + gl_GlobalInvocationID.x * step_x + gl_GlobalInvocationID.y * step_y + gl_GlobalInvocationID.z * step_z; + + return img; +} + +#define GC_CURRENT_OFFSET(name) \ + name.current_offset + +uint gc_vector_offset(Vector vec, int x) +{ + return CONVERT(CONVERT(vec.current_offset, int) + x * CONVERT(vec.stride_x, int), uint); +} + +uint gc_image_offset(Image img, int x, int y) +{ + return CONVERT(CONVERT(img.current_offset, int) + x * CONVERT(img.stride_x, int) + y * CONVERT(img.stride_y, int), uint); +} + +uint gc_tensor3D_offset(Tensor3D tensor, int x, int y, int z) +{ + return CONVERT(CONVERT(tensor.current_offset, int) + x * CONVERT(tensor.stride_x, int) + y * CONVERT(tensor.stride_y, int) + z * CONVERT(tensor.stride_z, int), uint); +} + +// load/store number of element depends on buffer type +#define GC_LOAD1(r, name, offset) \ + r = name##_ptr[offset] + +#define GC_LOAD2(r, name, offset) \ + GC_LOAD1(r[0], name, offset); \ + GC_LOAD1(r[1], name, (offset) + uint(1)) + +#define GC_LOAD3(r, name, offset) \ + GC_LOAD1(r[0], name, offset); \ + GC_LOAD1(r[1], name, (offset) + uint(1)); \ + GC_LOAD1(r[2], name, (offset) + uint(2)) + +#define GC_STORE1(value, name, offset) \ + name##_ptr[offset] = value + +#define GC_STORE2(value, name, offset) \ + GC_STORE1(value[0], name, offset); \ + GC_STORE1(value[1], name, (offset) + uint(1)) + +#define GC_STORE3(value, name, offset) \ + GC_STORE1(value[0], name, offset); \ + GC_STORE1(value[1], name, (offset) + uint(1)); \ + GC_STORE1(value[2], name, (offset) + uint(2)) + +// has to manually expand them since not supported by compiler +#define GC_LOAD1_1D_OFFSET(r, name, x) \ + GC_LOAD1(r, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size) + +#define GC_LOAD1_2D_OFFSET(r, name, x, y) \ + GC_LOAD1(r, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size) + +#define GC_LOAD1_3D_OFFSET(r, name, x, y, z) \ + GC_LOAD1(r, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size) + +#define GC_STORE1_1D_OFFSET(value, name, x) \ + GC_STORE1(value, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size) + +#define GC_STORE1_2D_OFFSET(value, name, x, y) \ + GC_STORE1(value, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size) + +#define GC_STORE1_3D_OFFSET(value, name, x, y, z) \ + GC_STORE1(value, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size) + +#define GC_LOAD2_1D_OFFSET(r, name, x) \ + GC_LOAD2(r, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size) + +#define GC_LOAD2_2D_OFFSET(r, name, x, y) \ + GC_LOAD2(r, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size) + +#define GC_LOAD2_3D_OFFSET(r, name, x, y, z) \ + GC_LOAD2(r, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size) + +#define GC_STORE2_1D_OFFSET(value, name, x) \ + GC_STORE2(value, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size) + +#define GC_STORE2_2D_OFFSET(value, name, x, y) \ + GC_STORE2(value, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size) + +#define GC_STORE2_3D_OFFSET(value, name, x, y, z) \ + GC_STORE2(value, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size) + +#define GC_LOAD3_1D_OFFSET(r, name, x) \ + GC_LOAD3(r, name, gc_vector_offset(name, int(x)) >> name##_buffer_data_type_size) + +#define GC_LOAD3_2D_OFFSET(r, name, x, y) \ + GC_LOAD3(r, name, gc_image_offset(name, int(x), int(y)) >> name##_buffer_data_type_size) + +#define GC_LOAD3_3D_OFFSET(r, name, x, y, z) \ + GC_LOAD3(r, name, gc_tensor3D_offset(name, int(x), int(y), int(z)) >> name##_buffer_data_type_size) + +///////////////////////////////////////////////////////////// + +#endif // _HELPER_H diff --git a/src/core/GLES_COMPUTE/cs_shaders/normalization_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/normalization_layer.cs new file mode 100755 index 0000000000..5699340c14 --- /dev/null +++ b/src/core/GLES_COMPUTE/cs_shaders/normalization_layer.cs @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in; + +#include "helpers.h" + +layout(std140) uniform shader_params +{ + TENSOR3D_PARAM_DECLARATION(src1); + TENSOR3D_PARAM_DECLARATION(src2); + TENSOR3D_PARAM_DECLARATION(dst); +}; + +BUFFER_DECLARATION(src1, 1, float, readonly); +BUFFER_DECLARATION(src2, 2, float, readonly); +BUFFER_DECLARATION(dst, 3, float, writeonly); + +#ifdef CROSS_MAP +/** Apply cross map normalization. + * + * @note Alpha parameter / norm_size should be given as a preprocessor argument using "#define COEFF x" + * @note BETA parameter in the normalization equation should be given as a preprocessor argument using "#define BETA x" + * @note KAPPA parameter in the normalization equation should be given as a preprocessor argument using "#define KAPPA x" + * @note Number of elements on the right or left side to normalize across should be given as a preprocessor argument using "#define RADIUS x" + * + * @param[in] src1_ptr Pointer to the first source tensor. Supported data types: F32 + * @param[in] src1_stride_x Stride of the first source tensor in X dimension (in bytes) + * @param[in] src1_step_x src1_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src1_stride_y Stride of the first source tensor in Y dimension (in bytes) + * @param[in] src1_step_y src1_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src1_stride_z Stride of the first source tensor in Z dimension (in bytes) + * @param[in] src1_step_z src1_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the first source tensor + * @param[in] src2_ptr Pointer to the second source tensor. Supported data types: Same as @p src1_ptr + * @param[in] src2_stride_x Stride of the second source tensor in X dimension (in bytes) + * @param[in] src2_step_x src2_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src2_stride_y Stride of the second source tensor in Y dimension (in bytes) + * @param[in] src2_step_y src2_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src2_stride_z Stride of the second source tensor in Z dimension (in bytes) + * @param[in] src2_step_z src2_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src2_offset_first_element_in_bytes The offset of the second element in the second source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: Same as @p src1_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + */ +void main(void) +{ + Tensor3D src1 = CONVERT_TO_TENSOR3D_STRUCT(src1); + Tensor3D src2 = CONVERT_TO_TENSOR3D_STRUCT(src2); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + + float acc = 0.0; + + int num_of_slices = int(gl_NumWorkGroups.z * gl_WorkGroupSize.z); + int current_slice = int(gl_GlobalInvocationID.z); + + int left_slice = max(current_slice - int(RADIUS), int(0)); + int right_slice = min(current_slice + int(RADIUS), int(num_of_slices - 1)); + + for(int i = left_slice; i <= right_slice; i++) + { + acc += src2_ptr[tensor3D_offset(src2, 0, 0, i - current_slice)]; + } + + float normalized = pow(float(KAPPA) + float(COEFF) * acc, float(BETA)); + + float normalized_pixel = (src1_ptr[src1.current_offset]) / normalized; + + dst_ptr[dst.current_offset] = normalized_pixel; +} + +#elif defined(IN_MAP_1D) +/** Apply in map normalization. + * + * @note Alpha parameter / norm_size should be given as a preprocessor argument using "#define COEFF x" + * @note BETA parameter in the normalization equation should be given as a preprocessor argument using "#define BETA x" + * @note KAPPA parameter in the normalization equation should be given as a preprocessor argument using "#define KAPPA x" + * @note Number of elements on the right or left side to normalize across should be given as a preprocessor argument using "#define RADIUS x" + * + * @param[in] src1_ptr Pointer to the first source tensor. Supported data types: F32 + * @param[in] src1_stride_x Stride of the first source tensor in X dimension (in bytes) + * @param[in] src1_step_x src1_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src1_stride_y Stride of the first source tensor in Y dimension (in bytes) + * @param[in] src1_step_y src1_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src1_stride_z Stride of the first source tensor in Z dimension (in bytes) + * @param[in] src1_step_z src1_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the first source tensor + * @param[in] src2_ptr Pointer to the second source tensor. Supported data types: Same as @p src1_ptr + * @param[in] src2_stride_x Stride of the second source tensor in X dimension (in bytes) + * @param[in] src2_step_x src2_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src2_stride_y Stride of the second source tensor in Y dimension (in bytes) + * @param[in] src2_step_y src2_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src2_stride_z Stride of the second source tensor in Z dimension (in bytes) + * @param[in] src2_step_z src2_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src2_offset_first_element_in_bytes The offset of the second element in the second source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Supported data types: Same as @p src1_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + */ +void main(void) +{ + Tensor3D src1 = CONVERT_TO_TENSOR3D_STRUCT(src1); + Tensor3D src2 = CONVERT_TO_TENSOR3D_STRUCT(src2); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + + float acc = 0.0; + + int num_of_items_x = int(gl_NumWorkGroups.x * gl_WorkGroupSize.x); + int current_pos = int(gl_GlobalInvocationID.x); + + int left_pos = max(current_pos - int(RADIUS), int(0)); + int right_pos = min(current_pos + int(RADIUS), int(num_of_items_x + -1)); + + for(int i = left_pos; i <= right_pos; i++) + { + acc += src2_ptr[tensor3D_offset(src2, i - current_pos, 0, 0)]; + } + + float normalized = pow(float(KAPPA) + float(COEFF) * acc, float(BETA)); + + float normalized_pixel = (src1_ptr[src1.current_offset]) / normalized; + + dst_ptr[dst.current_offset] = normalized_pixel; +} +#endif /*CROSS_MAP*/ diff --git a/src/core/GLES_COMPUTE/cs_shaders/pixelwise_mul_float.cs b/src/core/GLES_COMPUTE/cs_shaders/pixelwise_mul_float.cs new file mode 100644 index 0000000000..031687af0c --- /dev/null +++ b/src/core/GLES_COMPUTE/cs_shaders/pixelwise_mul_float.cs @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in; +#include "helpers.h" + +layout(std140) uniform shader_params +{ + TENSOR3D_PARAM_DECLARATION(src1); + TENSOR3D_PARAM_DECLARATION(src2); + TENSOR3D_PARAM_DECLARATION(dst); +}; + +BUFFER_DECLARATION(src1, 1, float, readonly); +BUFFER_DECLARATION(src2, 2, float, readonly); +BUFFER_DECLARATION(dst, 3, float, writeonly); + +/** Performs a pixelwise multiplication with float scale of either integer or float inputs. + * + * @param[in] src1_ptr Pointer to the source image. Supported data types: F32 + * @param[in] src1_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src1_step_x src1_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src1_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src1_step_y src1_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src1_stride_z Stride of the source image in Y dimension (in bytes) + * @param[in] src1_step_z src1_stride_z * number of elements along Y processed per workitem(in bytes) + * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source image + * @param[in] src2_ptr Pointer to the source image. Supported data types: Same as @p src1_ptr + * @param[in] src2_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src2_step_x src2_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src2_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src2_step_y src2_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src2_stride_z Stride of the source image in Y dimension (in bytes) + * @param[in] src2_step_z src2_stride_z * number of elements along Y processed per workitem(in bytes) + * @param[in] src2_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] dst_ptr Pointer to the destination image. Supported data types: Same as @p src1_ptr + * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination image in Y dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image + * @param[in] scale Float scaling factor. Supported data types: F32 + */ +void main() +{ + // Get pixels pointer + Tensor3D src1 = CONVERT_TO_TENSOR3D_STRUCT(src1); + Tensor3D src2 = CONVERT_TO_TENSOR3D_STRUCT(src2); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + + dst_ptr[dst.current_offset] = (src1_ptr[src1.current_offset] * src2_ptr[src2.current_offset] * float(SCALE)); +} diff --git a/src/core/GLES_COMPUTE/cs_shaders/pooling_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/pooling_layer.cs new file mode 100644 index 0000000000..1e0fee4688 --- /dev/null +++ b/src/core/GLES_COMPUTE/cs_shaders/pooling_layer.cs @@ -0,0 +1,1444 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in; +#include "helpers.h" + +#if defined(DATA_TYPE_FP32) + +float calculate_max(const int, Tensor3D, const int, const int, const int, const int, const int, const int); +float calculate_avg(const int, Tensor3D, const int, const int, const int, const int, const int, const int); + +BUFFER_DECLARATION(src, 1, float, readonly); +BUFFER_DECLARATION(dst, 2, float, writeonly); + +layout(std140) uniform shader_params +{ + TENSOR3D_PARAM_DECLARATION(src); + TENSOR3D_PARAM_DECLARATION(dst); +}; + +#define LOAD8(r, name, offset) \ + r.x = LOAD4(name, offset); \ + r.y = LOAD4(name, offset + uint(1)) + +#define LOAD16(r, name, offset) \ + r.x = LOAD4(name, offset); \ + r.y = LOAD4(name, offset + uint(1)); \ + r.z = LOAD4(name, offset + uint(2)); \ + r.w = LOAD4(name, offset + uint(3)) + +#define STORE16(name, offset, r) \ + STORE4(name, offset, r.x); \ + STORE4(name, offset + uint(1), r.y); \ + STORE4(name, offset + uint(2), r.z); \ + STORE4(name, offset + uint(3), r.w) + +#if defined(POOL_AVG) || defined(POOL_L2) +#define POOL_OP(res, a, b) ((res) = (a) + (b)) +#define POOL_OP_float(res, a, b) (res = a + b) +#define POOL_OP_vec2(res, a, b) ((res) = (a) + (b)) +#else /* defined(POOL_AVG) || defined(POOL_L2) */ +#define POOL_OP(res, a, b) \ + (res) = (a); \ + if(isnan(a.x) || (a.x < b.x)) \ + { \ + res.x = b.x; \ + } \ + if(isnan(a.y) || (a.y < b.y)) \ + { \ + res.y = b.y; \ + } \ + if(isnan(a.z) || (a.z < b.z)) \ + { \ + res.z = b.z; \ + } \ + if(isnan(a.w) || (a.w < b.w)) \ + { \ + res.w = b.w; \ + } +#define POOL_OP_float(res, a, b) \ + (res) = (a); \ + if(isnan(a) || (a < b)) \ + { \ + res = b; \ + } +#define POOL_OP_vec2(res, a, b) \ + (res) = (a); \ + if(isnan(a.x) || (a.x < b.x)) \ + { \ + res.x = b.x; \ + } \ + if(isnan(a.y) || (a.y < b.y)) \ + { \ + res.y = b.y; \ + } +#endif /* defined(POOL_AVG) || defined(POOL_L2) */ + +#if defined(POOL_L2) +#define POW2_OP(x, vec_size) ((x) * (x)) +#else /* defined(POOL_L2) */ +#define POW2_OP(x, vec_size) (x) +#endif /* defined(POOL_L2) */ + +#define DIV_OP(x, y) (x * (1.f / y)) +#define SQRT_OP(x) sqrt((x)) + +#if defined(POOL_SIZE) +// Set the initial value for the pooling operation accordingly with the data type +#if defined(POOL_AVG) || defined(POOL_L2) +#define INITIAL_VALUE 0.0f +#else /* defined(POOL_AVG) || defined(POOL_L2) */ +#define INITIAL_VALUE -3.402823466385289e+38 +#endif // POOL_AVG +#endif //POOL_SIZE + +#define POOLING3x3_STRIDE1(res, input, output) \ + vec4 data00; \ + vec2 data01; \ + vec4 data10; \ + vec2 data11; \ + vec4 data20; \ + vec2 data21; \ + LOAD16(data00, input, tensor3D_offset(input, 0, 0, 0)); \ + LOAD8(data01, input, tensor3D_offset(input, 0, 0, 0) + uint(4)); \ + LOAD16(data10, input, tensor3D_offset(input, 0, 1, 0)); \ + LOAD8(data11, input, tensor3D_offset(input, 0, 1, 0) + uint(4)); \ + LOAD16(data20, input, tensor3D_offset(input, 0, 2, 0)); \ + LOAD8(data21, input, tensor3D_offset(input, 0, 2, 0) + uint(4)); \ + data00 = POW2_OP(data00, 4); \ + data01 = POW2_OP(data01, 2); \ + data10 = POW2_OP(data10, 4); \ + data11 = POW2_OP(data11, 2); \ + data20 = POW2_OP(data20, 4); \ + data21 = POW2_OP(data21, 2); \ + \ + vec4 values000; \ + vec4 values001; \ + vec4 values010; \ + vec4 values100; \ + vec4 values101; \ + vec4 values11; \ + vec4 values200; \ + vec4 values201; \ + vec4 values21; \ + values000.xyzw = data00.xyzy; \ + values001.xyzw = data00.zwzw; \ + values010.x = data01.x; \ + values010.y = data00.w; \ + values010.zw = data01.xy; \ + values100.xyzw = data10.xyzy; \ + values101.xyzw = data10.zwzw; \ + values11.x = data11.x; \ + values11.y = data10.w; \ + values11.zw = data11.xy; \ + values200.xyzw = data20.xyzy; \ + values201.xyzw = data20.zwzw; \ + values21.x = data21.x; \ + values21.y = data20.w; \ + values21.zw = data21.xy; \ + POOL_OP(values000.xyzw, values000.xyzw, values100.xyzw); \ + POOL_OP(values001.xyzw, values001.xyzw, values101.xyzw); \ + POOL_OP(values010.xyzw, values010.xyzw, values11.xyzw); \ + POOL_OP(values000.xyzw, values000.xyzw, values200.xyzw); \ + POOL_OP(values001.xyzw, values001.xyzw, values201.xyzw); \ + POOL_OP(values010.xyzw, values010.xyzw, values21.xyzw); \ + POOL_OP(res.xyzw, vec4(values000.xw, values001.z, values010.y), vec4(values000.y, values001.xw, values010.z)); \ + POOL_OP(res.xyzw, res.xyzw, vec4(values000.z, values001.y, values010.xw)) + +#define POOLING3x3_STRIDE2(res, input, output) \ + vec4 data000; \ + vec4 data001; \ + float data010; \ + vec4 data100; \ + vec4 data101; \ + float data11; \ + vec4 data200; \ + vec4 data201; \ + float data21; \ + LOAD16(data000, input, tensor3D_offset(input, 0, 0, 0)); \ + LOAD16(data001, input, tensor3D_offset(input, 0, 0, 0) + uint(4)); \ + data010 = LOAD4(input, tensor3D_offset(input, 0, 0, 0) + uint(8)); \ + LOAD16(data100, input, tensor3D_offset(input, 0, 1, 0)); \ + LOAD16(data101, input, tensor3D_offset(input, 0, 1, 0) + uint(4)); \ + data11 = LOAD4(input, tensor3D_offset(input, 0, 1, 0) + uint(8)); \ + LOAD16(data200, input, tensor3D_offset(input, 0, 2, 0)); \ + LOAD16(data201, input, tensor3D_offset(input, 0, 2, 0) + uint(4)); \ + data21 = LOAD4(input, tensor3D_offset(input, 0, 2, 0) + uint(8)); \ + data000 = POW2_OP(data000, 4); \ + data001 = POW2_OP(data001, 4); \ + data010 = POW2_OP(data010, 1); \ + data100 = POW2_OP(data100, 4); \ + data101 = POW2_OP(data101, 4); \ + data11 = POW2_OP(data11, 1); \ + data200 = POW2_OP(data200, 4); \ + data201 = POW2_OP(data201, 4); \ + data21 = POW2_OP(data21, 1); \ + \ + vec4 values000; \ + vec4 values001; \ + vec4 values010; \ + vec4 values100; \ + vec4 values101; \ + vec4 values11; \ + vec4 values200; \ + vec4 values201; \ + vec4 values21; \ + values000.xyzw = data000.xyzz; \ + values001.xyzw = vec4(data000.w, data001.xxy); \ + values010.xyzw = vec4(data001.zzw, data010); \ + values100.xyzw = data100.xyzz; \ + values101.xyzw = vec4(data100.w, data101.xxy); \ + values11.xyzw = vec4(data101.zzw, data11); \ + values200.xyzw = data200.xyzz; \ + values201.xyzw = vec4(data200.w, data201.xxy); \ + values21.xyzw = vec4(data201.zzw, data21); \ + POOL_OP(values000.xyzw, values000.xyzw, values100.xyzw); \ + POOL_OP(values001.xyzw, values001.xyzw, values101.xyzw); \ + POOL_OP(values010.xyzw, values010.xyzw, values11.xyzw); \ + POOL_OP(values000.xyzw, values000.xyzw, values200.xyzw); \ + POOL_OP(values001.xyzw, values001.xyzw, values201.xyzw); \ + POOL_OP(values010.xyzw, values010.xyzw, values21.xyzw); \ + POOL_OP(res.xyzw, vec4(values000.xw, values001.z, values010.y), vec4(values000.y, values001.xw, values010.z)); \ + POOL_OP(res.xyzw, res.xyzw, vec4(values000.z, values001.y, values010.xw)) + +#define POOLING3x3_STRIDE3(res, input, output) \ + vec4 data000; \ + vec4 data001; \ + vec4 data010; \ + vec4 data100; \ + vec4 data101; \ + vec4 data11; \ + vec4 data200; \ + vec4 data201; \ + vec4 data21; \ + LOAD16(data000, input, tensor3D_offset(input, 0, 0, 0)); \ + LOAD16(data001, input, tensor3D_offset(input, 0, 0, 0) + uint(4)); \ + LOAD16(data010, input, tensor3D_offset(input, 0, 0, 0) + uint(8)); \ + LOAD16(data100, input, tensor3D_offset(input, 0, 1, 0)); \ + LOAD16(data101, input, tensor3D_offset(input, 0, 1, 0) + uint(4)); \ + LOAD16(data11, input, tensor3D_offset(input, 0, 1, 0) + uint(8)); \ + LOAD16(data200, input, tensor3D_offset(input, 0, 2, 0)); \ + LOAD16(data201, input, tensor3D_offset(input, 0, 2, 0) + uint(4)); \ + LOAD16(data21, input, tensor3D_offset(input, 0, 2, 0) + uint(8)); \ + data000 = POW2_OP(data000, 4); \ + data001 = POW2_OP(data001, 4); \ + data010 = POW2_OP(data010, 4); \ + data100 = POW2_OP(data100, 4); \ + data101 = POW2_OP(data101, 4); \ + data11 = POW2_OP(data11, 4); \ + data200 = POW2_OP(data200, 4); \ + data201 = POW2_OP(data201, 4); \ + data21 = POW2_OP(data21, 4); \ + \ + POOL_OP(data000.xyzw, data000.xyzw, data100.xyzw); \ + POOL_OP(data001.xyzw, data001.xyzw, data101.xyzw); \ + POOL_OP(data010.xyzw, data010.xyzw, data11.xyzw); \ + POOL_OP(data000.xyzw, data000.xyzw, data200.xyzw); \ + POOL_OP(data001.xyzw, data001.xyzw, data201.xyzw); \ + POOL_OP(data010.xyzw, data010.xyzw, data21.xyzw); \ + POOL_OP(res.xyzw, vec4(data000.xw, data001.z, data010.y), vec4(data000.y, data001.xw, data010.z)); \ + POOL_OP(res.xyzw, res.xyzw, vec4(data000.z, data001.y data010.xw)) + +float calculate_max(const int pool_size, Tensor3D src, const int upper_bound_w, const int upper_bound_h, const int pad_x, const int pad_y, const int stride_x, const int stride_y) +{ + int start_x = int(gl_GlobalInvocationID.x) * stride_x - pad_x; + int start_y = int(gl_GlobalInvocationID.y) * stride_y - pad_y; + int end_x = int(min(start_x + pool_size, upper_bound_w)); + int end_y = int(min(start_y + pool_size, upper_bound_h)); + + float data_max; + data_max = LOAD4(src, tensor3D_offset(src, 0, 0, 0)); + + for(int i = 0; (start_x + i) < end_x; ++i) + { + for(int j = 0; (start_y + j) < end_y; ++j) + { + float data = LOAD4(src, tensor3D_offset(src, i, j, 0)); + POOL_OP_float(data_max, data_max, data); + } + } + + return data_max; +} + +float calculate_avg(const int pool_size, Tensor3D src, const int upper_bound_w, const int upper_bound_h, const int pad_x, const int pad_y, const int stride_x, const int stride_y) +{ + int start_x = int(gl_GlobalInvocationID.x) * stride_x - pad_x; + int start_y = int(gl_GlobalInvocationID.y) * stride_y - pad_y; + int end_x = int(min(start_x + pool_size, upper_bound_w)); + int end_y = int(min(start_y + pool_size, upper_bound_h)); + + float data_total = 0.0f; + for(int i = 0; (start_x + i) < end_x; i++) + { + for(int j = 0; (start_y + j) < end_y; ++j) + { + float data = LOAD4(src, tensor3D_offset(src, i, j, 0)); + if(isnan(data)) + { + data = 0.0f; + } +#if defined(POOL_L2) + // Raise to power of 2 for L2 Pooling + data = POW2_OP(data, 1); +#endif /* defined(POOL_L2) */ + data_total = data_total + data; + } + } + + return data_total / float((end_y - start_y) * (end_x - start_x)); +} + +#ifdef POOLING_LAYER_2 +/** Performs a pooling function of pool size equal to 2. + * + * @note Supported data types are F32; + * @note In case of average pooling the following information must be passed at compile time: + * POOL_AVG must be provided otherwise max pooling will be performed. + * MAX_WIDTH and MAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad) + * STRIDE_X and STRIDE_Y which are the steps of the window along the x and y directions + * PAD_X and PAD_Y which are the pooling paddings in x and y dimension + * + * @param[in] src_ptr Pointer to the source image. Supported data types: F32 + * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] dst_ptr Pointer to the destination image. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image + */ +void main(void) +{ + // Get pixels pointer + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + + //Load and calculate data + float res; +#if defined(POOL_AVG) || defined(POOL_L2) + res = calculate_avg(2, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y); +#else /*POOL_AVG*/ + res = calculate_max(2, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y); +#endif /*POOL_AVG*/ + +#if defined(POOL_L2) + // Take square root of the result in L2 pooling + res = SQRT_OP(res); +#endif /* defined(POOL_L2) */ + + // Store result + STORE4(dst, CURRENT_OFFSET(dst), res); +} + +#elif defined(POOLING_LAYER_3) +/** Performs a pooling function of pool size equal to 3. + * + * @note Supported data types are F32; + * @note In case of average pooling the following information must be passed at compile time: + * POOL_AVG must be provided otherwise max pooling will be performed. + * MAX_WIDTH and MAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad) + * STRIDE_X and STRIDE_Y which are the steps of the window along the x and y directions + * PAD_X and PAD_Y which are the pooling paddings in x and y dimension + * + * @param[in] src_ptr Pointer to the source image. Supported data types: F32 + * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] dst_ptr Pointer to the destination image. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image + */ +void main(void) +{ + // Get pixels pointer + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + + //Load and calculate data + float res; +#if defined(POOL_AVG) || defined(POOL_L2) + res = calculate_avg(3, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y); +#else /*POOL_AVG*/ + res = calculate_max(3, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y); +#endif /*POOL_AVG*/ + +#if defined(POOL_L2) + // Take square root of the result in L2 pooling + res = SQRT_OP(res); +#endif /* defined(POOL_L2) */ + + // Store result + STORE4(dst, CURRENT_OFFSET(dst), res); +} + +#elif defined(POOLING_LAYER_3_OPTIMIZED) +/** Performs an optimized pooling function of pool size equal to 3 when the stride_x is less equal than 3 + * + * @note Supported data types are F32; + * @note In case of average pooling the following information must be passed at compile time: + * POOL_AVG must be provided otherwise max pooling will be performed. + * MAX_WIDTH and MAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad) + * STRIDE_X and STRIDE_Y which are the steps of the window along the x and y directions + * PAD_X and PAD_Y which are the pooling paddings in x and y dimension + * + * @param[in] src_ptr Pointer to the source image. Supported data types: F32 + * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] dst_ptr Pointer to the destination image. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image + */ +void main(void) +{ + // Get pixels pointer + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + + vec4 res; + // Perform pooling 3x3 for 4 output elements +#if STRIDE_X == 1 + POOLING3x3_STRIDE1(res, src, dst); +#elif STRIDE_X == 2 + POOLING3x3_STRIDE2(res, src, dst); +#elif STRIDE_X == 3 + POOLING3x3_STRIDE3(res, src, dst); +#endif /*STRIDE_X == 1*/ + + // Divide by pool region in case of average pooling +#if defined(POOL_AVG) || defined(POOL_L2) + ivec4 start_x = ((ivec4(int(gl_GlobalInvocationID.x) * 4) + ivec4(0, 1, 2, 3)) * (ivec4(STRIDE_X))) - (ivec4(PAD_X)); + int start_y = int(gl_GlobalInvocationID.y) * STRIDE_Y - PAD_Y; + ivec4 end_x = min((start_x + (ivec4(3))), (ivec4(MAX_WIDTH))); + int end_y = min((start_y + 3), MAX_HEIGHT); + res *= (vec4((1.f)) / vec4((ivec4(end_y - start_y)) * (end_x - start_x))); +#endif /*POOL_AVG*/ + +#if defined(POOL_L2) + // Take square root of the result in L2 pooling + res = SQRT_OP(res); +#endif /* defined(POOL_L2) */ + + STORE16(dst, CURRENT_OFFSET(dst), res); +} + +#elif defined(POOLING_LAYER_7) +/** Performs a pooling function of pool size equal to 7. + * + * @note Supported data types are F32; + * @note In case of average pooling the following information must be passed at compile time: + * POOL_AVG must be provided otherwise max pooling will be performed. + * MAX_WIDTH and MAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad) + * STRIDE_X and STRIDE_Y which are the steps of the window along the x and y directions + * PAD_X and PAD_Y which are the pooling paddings in x and y dimension + * + * @param[in] src_ptr Pointer to the source image. Supported data types: F32 + * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] dst_ptr Pointer to the destination image. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image + */ +void main(void) +{ + // Get pixels pointer + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + + //Load and calculate data + float res; +#if defined(POOL_AVG) || defined(POOL_L2) + res = calculate_avg(7, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y); +#else /*POOL_AVG*/ + res = calculate_max(7, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y); +#endif /*POOL_AVG*/ + +#if defined(POOL_L2) + // Take square root of the result in L2 pooling + res = SQRT_OP(res); +#endif /* defined(POOL_L2) */ + + // Store result + STORE4(dst, CURRENT_OFFSET(dst), res); +} + +#elif defined(POOLING_LAYER_N) +/** Performs a pooling function of pool size equal to N + * + * @note Supported data types are F32; + * @note Pool size must be passed using POOL_SIZE e.g. POOL_SIZE=13; + * @note In case of average pooling the following information must be passed at compile time: + * POOL_AVG must be provided otherwise max pooling will be performed. + * MAX_WIDTH and MAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad) + * STRIDE_X and STRIDE_Y which are the steps of the window along the x and y directions + * PAD_X and PAD_Y which are the pooling paddings in x and y dimension + * + * @param[in] src_ptr Pointer to the source image. Supported data types: F32 + * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] dst_ptr Pointer to the destination image. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image + */ +void main(void) +{ + // Get pixels pointer + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst); + + vec4 vdata0; + vdata0 = vec4(INITIAL_VALUE); + vec4 vdata1; + vdata1 = vec4(INITIAL_VALUE); + float sdata; + sdata = float(INITIAL_VALUE); + + for(int y = 0; y < int(POOL_SIZE); y++) + { + int x = 0; + for(; x <= (int(POOL_SIZE) - 8); x += 8) + { + vec4 data2; + vec4 data3; + LOAD16(data2, src, tensor3D_offset(src, x, y, 0)); + LOAD16(data3, src, tensor3D_offset(src, x, y, 0) + uint(4)); + +#if defined(POOL_L2) + // Raise to power of 2 for L2 Pooling + data2 *= data2; + data3 *= data3; +#endif /* defined(POOL_L2) */ + + POOL_OP(vdata0, vdata0, data2); + POOL_OP(vdata1, vdata1, data3); + } + + // Leftover + for(; x < int(POOL_SIZE); ++x) + { + float data4 = LOAD4(src, tensor3D_offset(src, x, y, 0)); +#if defined(POOL_L2) + // Raise to power of 2 for L2 Pooling + data4 *= data4; +#endif /* defined(POOL_L2) */ + POOL_OP_float(sdata, sdata, data4); + } + } + + //Reduce result + vec4 reduce4; + POOL_OP(reduce4, vdata0.xyzw, vdata1.xyzw); + vec2 reduce2; + POOL_OP_vec2(reduce2, reduce4.xy, reduce4.zw); + float res; + POOL_OP_float(res, reduce2.x, reduce2.y); + POOL_OP_float(res, res, sdata); + +#if defined(POOL_AVG) || defined(POOL_L2) + { + // Divide by pool region in case of average pooling + int start_x = int(gl_GlobalInvocationID.x) * STRIDE_X - PAD_X; + int start_y = int(gl_GlobalInvocationID.y) * STRIDE_Y - PAD_Y; + int end_x = int(min(STRIDE_X + POOL_SIZE, MAX_WIDTH)); + int end_y = int(min(STRIDE_Y + POOL_SIZE, MAX_HEIGHT)); + float res1 = float((end_y - start_y) * (end_x - start_x)); + res = DIV_OP(res, res1); + } +#endif /* defined(POOL_AVG) || defined(POOL_L2) */ + +#if defined(POOL_L2) + // Take square root of the result in L2 pooling + res = SQRT_OP(res); +#endif /* defined(POOL_L2) */ + + // Store result + STORE4(dst, CURRENT_OFFSET(dst), res); +} +#endif /* POOLING_LAYER_2 */ + +#elif defined(DATA_TYPE_FP16) + +precision mediump float; + +vec2 load_and_unpack(Tensor3D, uint); +vec2 calculate_max(const int, Tensor3D, const int, const int, const int, const int, const int, const int); +vec2 calculate_avg(const int, Tensor3D, const int, const int, const int, const int, const int, const int); + +BUFFER_DECLARATION(src, 1, uint, readonly); +BUFFER_DECLARATION(dst, 2, uint, writeonly); + +layout(std140) uniform shader_params +{ + TENSOR3D_PARAM_DECLARATION(src); + TENSOR3D_PARAM_DECLARATION(dst); +}; + +#define LOAD2_fp16(r, name, offset) \ + r.xy = load_and_unpack(name, offset) + +#define LOAD4_fp16(r, name, offset) \ + r.xy = load_and_unpack(name, offset); \ + r.zw = load_and_unpack(name, offset + uint(1)) + +#define STORE4_fp16(name, offset, r) \ + uint datastore1; \ + uint datastore2; \ + datastore1 = uint(packHalf2x16(r.xy)); \ + datastore2 = uint(packHalf2x16(r.zw)); \ + STORE1(name, offset << uint(1), datastore1); \ + STORE1(name, (offset << uint(1)) + uint(1), datastore2) + +#if defined(POOL_AVG) || defined(POOL_L2) +#define POOL_OP(res, a, b) ((res) = (a) + (b)) +#define POOL_OP_float(res, a, b) (res = a + b) +#define POOL_OP_vec2(res, a, b) ((res) = (a) + (b)) +#else /* defined(POOL_AVG) || defined(POOL_L2) */ +#define POOL_OP(res, a, b) \ + (res) = (a); \ + if(isnan(a.x) || (a.x < b.x)) \ + { \ + res.x = b.x; \ + } \ + if(isnan(a.y) || (a.y < b.y)) \ + { \ + res.y = b.y; \ + } \ + if(isnan(a.z) || (a.z < b.z)) \ + { \ + res.z = b.z; \ + } \ + if(isnan(a.w) || (a.w < b.w)) \ + { \ + res.w = b.w; \ + } +#define POOL_OP_float(res, a, b) \ + (res) = (a); \ + if(isnan(a) || (a < b)) \ + { \ + res = b; \ + } +#define POOL_OP_vec2(res, a, b) \ + (res) = (a); \ + if(isnan(a.x) || (a.x < b.x)) \ + { \ + res.x = b.x; \ + } \ + if(isnan(a.y) || (a.y < b.y)) \ + { \ + res.y = b.y; \ + } +#endif /* defined(POOL_AVG) || defined(POOL_L2) */ + +#if defined(POOL_L2) +#define POW2_OP(x, vec_size) ((x) * (x)) +#else /* defined(POOL_L2) */ +#define POW2_OP(x, vec_size) (x) +#endif /* defined(POOL_L2) */ + +#define DIV_OP(x, y) (x * (1.f / y)) +#define SQRT_OP(x) sqrt((x)) + +#if defined(POOL_SIZE) +// Set the initial value for the pooling operation accordingly with the data type +#if defined(POOL_AVG) || defined(POOL_L2) +#define INITIAL_VALUE 0.0f +#else /* defined(POOL_AVG) || defined(POOL_L2) */ +#define INITIAL_VALUE -65504.0f +#endif //POOL_AVG +#endif //POOL_SIZE + +#define POOLING3x3_STRIDE1_fp16(res, input, output) \ + vec4 data00; \ + vec2 data01; \ + vec4 data10; \ + vec2 data11; \ + vec4 data20; \ + vec2 data21; \ + LOAD4_fp16(data00, input, (tensor3D_offset_fp16(input, 0, 0, 0) >> uint(2))); \ + LOAD2_fp16(data01, input, (tensor3D_offset_fp16(input, 0, 0, 0) >> uint(2)) + uint(2)); \ + LOAD4_fp16(data10, input, (tensor3D_offset_fp16(input, 0, 1, 0) >> uint(2))); \ + LOAD2_fp16(data11, input, (tensor3D_offset_fp16(input, 0, 1, 0) >> uint(2)) + uint(2)); \ + LOAD4_fp16(data20, input, (tensor3D_offset_fp16(input, 0, 2, 0) >> uint(2))); \ + LOAD2_fp16(data21, input, (tensor3D_offset_fp16(input, 0, 2, 0) >> uint(2)) + uint(2)); \ + data00 = POW2_OP(data00, 4); \ + data01 = POW2_OP(data01, 2); \ + data10 = POW2_OP(data10, 4); \ + data11 = POW2_OP(data11, 2); \ + data20 = POW2_OP(data20, 4); \ + data21 = POW2_OP(data21, 2); \ + \ + vec4 values000; \ + vec4 values001; \ + vec4 values010; \ + vec4 values100; \ + vec4 values101; \ + vec4 values11; \ + vec4 values200; \ + vec4 values201; \ + vec4 values21; \ + values000.xyzw = data00.xyzy; \ + values001.xyzw = data00.zwzw; \ + values010.x = data01.x; \ + values010.y = data00.w; \ + values010.zw = data01.xy; \ + values100.xyzw = data10.xyzy; \ + values101.xyzw = data10.zwzw; \ + values11.x = data11.x; \ + values11.y = data10.w; \ + values11.zw = data11.xy; \ + values200.xyzw = data20.xyzy; \ + values201.xyzw = data20.zwzw; \ + values21.x = data21.x; \ + values21.y = data20.w; \ + values21.zw = data21.xy; \ + POOL_OP(values000.xyzw, values000.xyzw, values100.xyzw); \ + POOL_OP(values001.xyzw, values001.xyzw, values101.xyzw); \ + POOL_OP(values010.xyzw, values010.xyzw, values11.xyzw); \ + POOL_OP(values000.xyzw, values000.xyzw, values200.xyzw); \ + POOL_OP(values001.xyzw, values001.xyzw, values201.xyzw); \ + POOL_OP(values010.xyzw, values010.xyzw, values21.xyzw); \ + POOL_OP(res.xyzw, vec4(values000.xw, values001.z, values010.y), vec4(values000.y, values001.xw, values010.z)); \ + POOL_OP(res.xyzw, res.xyzw, vec4(values000.z, values001.y, values010.xw)) + +#define POOLING3x3_STRIDE2_fp16(res, input, output) \ + vec4 data000; \ + vec4 data001; \ + float data010; \ + vec4 data100; \ + vec4 data101; \ + float data11; \ + vec4 data200; \ + vec4 data201; \ + float data21; \ + vec2 datamiddle0; \ + vec2 datamiddle1; \ + vec2 datamiddle2; \ + LOAD4_fp16(data000, input, (tensor3D_offset_fp16(input, 0, 0, 0) >> uint(2))); \ + LOAD4_fp16(data001, input, (tensor3D_offset_fp16(input, 0, 0, 0) >> uint(2)) + uint(2)); \ + datamiddle0 = load_and_unpack(input, (tensor3D_offset_fp16(input, 0, 0, 0) >> uint(2)) + uint(4)); \ + data010 = datamiddle0.x; \ + LOAD4_fp16(data100, input, (tensor3D_offset_fp16(input, 0, 1, 0) >> uint(2))); \ + LOAD4_fp16(data101, input, (tensor3D_offset_fp16(input, 0, 1, 0) >> uint(2)) + uint(2)); \ + datamiddle1 = load_and_unpack(input, (tensor3D_offset_fp16(input, 0, 1, 0) >> uint(2)) + uint(4)); \ + data11 = datamiddle1.x; \ + LOAD4_fp16(data200, input, (tensor3D_offset_fp16(input, 0, 2, 0) >> uint(2))); \ + LOAD4_fp16(data201, input, (tensor3D_offset_fp16(input, 0, 2, 0) >> uint(2)) + uint(2)); \ + datamiddle2 = load_and_unpack(input, (tensor3D_offset_fp16(input, 0, 2, 0) >> uint(2)) + uint(4)); \ + data21 = datamiddle2.x; \ + data000 = POW2_OP(data000, 4); \ + data001 = POW2_OP(data001, 4); \ + data010 = POW2_OP(data010, 1); \ + data100 = POW2_OP(data100, 4); \ + data101 = POW2_OP(data101, 4); \ + data11 = POW2_OP(data11, 1); \ + data200 = POW2_OP(data200, 4); \ + data201 = POW2_OP(data201, 4); \ + data21 = POW2_OP(data21, 1); \ + \ + vec4 values000; \ + vec4 values001; \ + vec4 values010; \ + vec4 values100; \ + vec4 values101; \ + vec4 values11; \ + vec4 values200; \ + vec4 values201; \ + vec4 values21; \ + values000.xyzw = data000.xyzz; \ + values001.xyzw = vec4(data000.w, data001.xxy); \ + values010.xyzw = vec4(data001.zzw, data010); \ + values100.xyzw = data100.xyzz; \ + values101.xyzw = vec4(data100.w, data101.xxy); \ + values11.xyzw = vec4(data101.zzw, data11); \ + values200.xyzw = data200.xyzz; \ + values201.xyzw = vec4(data200.w, data201.xxy); \ + values21.xyzw = vec4(data201.zzw, data21); \ + POOL_OP(values000.xyzw, values000.xyzw, values100.xyzw); \ + POOL_OP(values001.xyzw, values001.xyzw, values101.xyzw); \ + POOL_OP(values010.xyzw, values010.xyzw, values11.xyzw); \ + POOL_OP(values000.xyzw, values000.xyzw, values200.xyzw); \ + POOL_OP(values001.xyzw, values001.xyzw, values201.xyzw); \ + POOL_OP(values010.xyzw, values010.xyzw, values21.xyzw); \ + POOL_OP(res.xyzw, vec4(values000.xw, values001.z, values010.y), vec4(values000.y, values001.xw, values010.z)); \ + POOL_OP(res.xyzw, res.xyzw, vec4(values000.z, values001.y, values010.xw)) + +#define POOLING3x3_STRIDE3_fp16(res, input, output) \ + vec4 data000; \ + vec4 data001; \ + vec4 data010; \ + vec4 data100; \ + vec4 data101; \ + vec4 data11; \ + vec4 data200; \ + vec4 data201; \ + vec4 data21; \ + LOAD4_fp16(data000, input, (tensor3D_offset_fp16(input, 0, 0, 0) >> uint(2))); \ + LOAD4_fp16(data001, input, (tensor3D_offset_fp16(input, 0, 0, 0) >> uint(2)) + uint(2)); \ + LOAD4_fp16(data010, input, (tensor3D_offset_fp16(input, 0, 0, 0) >> uint(2)) + uint(4)); \ + LOAD4_fp16(data100, input, (tensor3D_offset_fp16(input, 0, 1, 0) >> uint(2))); \ + LOAD4_fp16(data101, input, (tensor3D_offset_fp16(input, 0, 1, 0) >> uint(2)) + uint(2)); \ + LOAD4_fp16(data11, input, (tensor3D_offset_fp16(input, 0, 1, 0) >> uint(2)) + uint(4)); \ + LOAD4_fp16(data200, input, (tensor3D_offset_fp16(input, 0, 2, 0) >> uint(2))); \ + LOAD4_fp16(data201, input, (tensor3D_offset_fp16(input, 0, 2, 0) >> uint(2)) + uint(2)); \ + LOAD4_fp16(data21, input, (tensor3D_offset_fp16(input, 0, 2, 0) >> uint(2)) + uint(4)); \ + data000 = POW2_OP(data000, 4); \ + data001 = POW2_OP(data001, 4); \ + data010 = POW2_OP(data010, 4); \ + data100 = POW2_OP(data100, 4); \ + data101 = POW2_OP(data101, 4); \ + data11 = POW2_OP(data11, 4); \ + data200 = POW2_OP(data200, 4); \ + data201 = POW2_OP(data201, 4); \ + data21 = POW2_OP(data21, 4); \ + \ + POOL_OP(data000.xyzw, data000.xyzw, data100.xyzw); \ + POOL_OP(data001.xyzw, data001.xyzw, data101.xyzw); \ + POOL_OP(data010.xyzw, data010.xyzw, data11.xyzw); \ + POOL_OP(data000.xyzw, data000.xyzw, data200.xyzw); \ + POOL_OP(data001.xyzw, data001.xyzw, data201.xyzw); \ + POOL_OP(data010.xyzw, data010.xyzw, data21.xyzw); \ + POOL_OP(res.xyzw, vec4(data000.xw, data001.z, data010.y), vec4(data000.y, data001.xw, data010.z)); \ + POOL_OP(res.xyzw, res.xyzw, vec4(data000.z, data001.y data010.xw)) + +vec2 load_and_unpack(Tensor3D src, uint offset) +{ + uint packed_s; + vec2 s; + LOAD1(packed_s, src, offset); + + s = vec2(unpackHalf2x16(packed_s)); + return s; +} + +vec2 calculate_max(const int pool_size, Tensor3D src, const int upper_bound_w, const int upper_bound_h, const int pad_x, const int pad_y, const int stride_x, const int stride_y) +{ + int start_x1 = int(gl_GlobalInvocationID.x) * stride_x - pad_x; + int start_y1 = int(gl_GlobalInvocationID.y) * stride_y - pad_y; + int end_x1 = int(min(start_x1 + pool_size, upper_bound_w)); + int end_y1 = int(min(start_y1 + pool_size, upper_bound_h)); + + int start_x2 = start_x1 + stride_x; + int start_y2 = start_y1; + int end_x2 = int(min(start_x2 + pool_size, upper_bound_w)); + int end_y2 = int(min(start_y2 + pool_size, upper_bound_h)); + + //Initialize maximum + vec2 data_max = vec2(0); + + //Load and Set initial maximum1 + vec2 data_init1 = load_and_unpack(src, tensor3D_offset_fp16(src, 0, 0, 0) >> uint(2)); + data_max.x = data_init1.x; + + //Load and Set initial maximum2 + if(end_x1 < upper_bound_w) + { + if((stride_x % 2) == 0) + { + vec2 data_init2 = load_and_unpack(src, tensor3D_offset_fp16(src, stride_x, 0, 0) >> uint(2)); + data_max.y = data_init2.x; + } + else + { + vec2 data_init2 = load_and_unpack(src, tensor3D_offset_fp16(src, stride_x - 1, 0, 0) >> uint(2)); + data_max.y = data_init2.y; + } + } + + for(int i = 0; (start_y1 + i) < end_y1; i++) + for(int j = 0; (start_x1 + j) < end_x1; j = j + 2) + { + //Calculate maximum1 + if((start_x1 + j + 1) < end_x1) + { + vec2 data1 = load_and_unpack(src, tensor3D_offset_fp16(src, j, i, 0) >> uint(2)); + float data_mr1; + POOL_OP_float(data_mr1, data1.x, data1.y); + POOL_OP_float(data_max.x, data_max.x, data_mr1); + } + else + { + vec2 data1 = load_and_unpack(src, tensor3D_offset_fp16(src, j, i, 0) >> uint(2)); + POOL_OP_float(data_max.x, data_max.x, data1.x); + } + + //Calculate maximum2 + if((start_x2 + j) < end_x2 && end_x1 < upper_bound_w) + { + if((stride_x % 2) == 0) + { + vec2 data2 = load_and_unpack(src, (tensor3D_offset_fp16(src, (j + stride_x), i, 0) >> uint(2))); + + if((start_x2 + j + 1) < end_x2) + { + float data_mr2; + POOL_OP_float(data_mr2, data2.x, data2.y); + POOL_OP_float(data_max.y, data_max.y, data_mr2); + } + else + { + POOL_OP_float(data_max.y, data_max.y, data2.x); + } + } + else + { + vec2 data2 = load_and_unpack(src, (tensor3D_offset_fp16(src, (j + stride_x - 1), i, 0) >> uint(2))); + vec2 data3 = load_and_unpack(src, (tensor3D_offset_fp16(src, (j + stride_x + 1), i, 0) >> uint(2))); + if((start_x2 + j + 1) < end_x2) + { + float data_mr2; + POOL_OP_float(data_mr2, data3.x, data2.y); + POOL_OP_float(data_max.y, data_max.y, data_mr2); + } + else + { + POOL_OP_float(data_max.y, data_max.y, data2.y); + } + } + } + } + return data_max; +} + +vec2 calculate_avg(const int pool_size, Tensor3D src, const int upper_bound_w, const int upper_bound_h, const int pad_x, const int pad_y, const int stride_x, const int stride_y) +{ + int start_x1 = int(gl_GlobalInvocationID.x) * stride_x - pad_x; + int start_y1 = int(gl_GlobalInvocationID.y) * stride_y - pad_y; + int end_x1 = int(min(start_x1 + pool_size, upper_bound_w)); + int end_y1 = int(min(start_y1 + pool_size, upper_bound_h)); + + int start_x2 = start_x1 + stride_x; + int start_y2 = start_y1; + int end_x2 = int(min(start_x2 + pool_size, upper_bound_w)); + int end_y2 = int(min(start_y2 + pool_size, upper_bound_h)); + + //Initialize sum + float data_total1 = float(0); + float data_total2 = float(0); + for(int i = 0; (start_y1 + i) < end_y1; i++) + for(int j = 0; (start_x1 + j) < end_x1; j = j + 2) + { + vec2 data1 = load_and_unpack(src, tensor3D_offset_fp16(src, j, i, 0) >> uint(2)); +#if defined(POOL_L2) + // Raise to power of 2 for L2 Pooling + data1 = POW2_OP(data1, 2); +#endif /* defined(POOL_L2) */ + //Calculate sum1 + if((start_x1 + j + 1) < end_x1) + { + data_total1 = data_total1 + data1.x + data1.y; + } + else + { + data_total1 = data_total1 + data1.x; + } + + //Calculate sum2 + if((start_x2 + j) < end_x2 && end_x1 < upper_bound_w) + { + if((stride_x % 2) == 0) + { + vec2 data2 = load_and_unpack(src, (tensor3D_offset_fp16(src, (j + stride_x + 1), i, 0) >> uint(2))); +#if defined(POOL_L2) + // Raise to power of 2 for L2 Pooling + data2 = POW2_OP(data2, 2); +#endif /* defined(POOL_L2) */ + if((start_x2 + j + 1) < end_x2) + { + data_total2 = data_total2 + data2.x + data2.y; + } + else + { + data_total2 = data_total2 + data2.x; + } + } + else + { + vec2 data2 = load_and_unpack(src, (tensor3D_offset_fp16(src, (j + stride_x - 1), i, 0) >> uint(2))); + vec2 data3 = load_and_unpack(src, (tensor3D_offset_fp16(src, (j + stride_x + 1), i, 0) >> uint(2))); +#if defined(POOL_L2) + // Raise to power of 2 for L2 Pooling + data2 = POW2_OP(data2, 2); + data3 = POW2_OP(data3, 2); +#endif /* defined(POOL_L2) */ + if((start_x2 + j + 1) < end_x2) + { + data_total2 = data_total2 + data3.x + data2.y; + } + else + { + data_total2 = data_total2 + data2.y; + } + } + } + } + //Calculate average + vec2 data_avg; + data_avg.x = data_total1 / float((end_y1 - start_y1) * (end_x1 - start_x1)); + data_avg.y = data_total2 / float((end_y2 - start_y2) * (end_x2 - start_x2)); + + return data_avg; +} + +#ifdef POOLING_LAYER_2 +/** Performs a pooling function of pool size equal to 2. + * + * @note Supported data types are F16; + * @note In case of average pooling the following information must be passed at compile time: + * POOL_AVG must be provided otherwise max pooling will be performed. + * MAX_WIDTH and MAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad) + * STRIDE_X and STRIDE_Y which are the steps of the window along the x and y directions + * PAD_X and PAD_Y which are the pooling paddings in x and y dimension + * + * @param[in] src_ptr Pointer to the source image. Supported data types: F16 + * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] dst_ptr Pointer to the destination image. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image + */ +void main(void) +{ + // Get pixels pointer + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT_FP16(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst); + + //Load and calculate data + vec2 data; + uint res; +#if defined(POOL_AVG) || defined(POOL_L2) + data = calculate_avg(2, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y); +#else /*POOL_AVG*/ + data = calculate_max(2, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y); +#endif /*POOL_AVG*/ + +#if defined(POOL_L2) + // Take square root of the result in L2 pooling + data = SQRT_OP(data); +#endif /* defined(POOL_L2) */ + + res = uint(packHalf2x16(data)); + + // Store result + STORE1(dst, CURRENT_OFFSET(dst) >> uint(2), res); +} + +#elif defined(POOLING_LAYER_3) +/** Performs a pooling function of pool size equal to 3. + * + * @note Supported data types are F16; + * @note In case of average pooling the following information must be passed at compile time: + * POOL_AVG must be provided otherwise max pooling will be performed. + * MAX_WIDTH and MAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad) + * STRIDE_X and STRIDE_Y which are the steps of the window along the x and y directions + * PAD_X and PAD_Y which are the pooling paddings in x and y dimension + * + * @param[in] src_ptr Pointer to the source image. Supported data types: F16 + * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] dst_ptr Pointer to the destination image. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image + */ +void main(void) +{ + // Get pixels pointer + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT_FP16(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst); + + //Load and calculate data + vec2 data; + uint res; +#if defined(POOL_AVG) || defined(POOL_L2) + data = calculate_avg(3, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y); +#else /*POOL_AVG*/ + data = calculate_max(3, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y); +#endif /*POOL_AVG*/ + +#if defined(POOL_L2) + // Take square root of the result in L2 pooling + data = SQRT_OP(data); +#endif /* defined(POOL_L2) */ + + res = uint(packHalf2x16(data)); + + // Store result + STORE1(dst, CURRENT_OFFSET(dst) >> uint(2), res); +} + +#elif defined(POOLING_LAYER_3_OPTIMIZED) +/** Performs an optimized pooling function of pool size equal to 3 when the stride_x is less equal than 3 + * + * @note Supported data types are F16; + * @note In case of average pooling the following information must be passed at compile time: + * POOL_AVG must be provided otherwise max pooling will be performed. + * MAX_WIDTH and MAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad) + * STRIDE_X and STRIDE_Y which are the steps of the window along the x and y directions + * PAD_X and PAD_Y which are the pooling paddings in x and y dimension + * + * @param[in] src_ptr Pointer to the source image. Supported data types: F16 + * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] dst_ptr Pointer to the destination image. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image + */ +void main(void) +{ + // Get pixels pointer + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT_FP16(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst); + + vec4 res; + // Perform pooling 3x3 for 4 output elements +#if STRIDE_X == 1 + POOLING3x3_STRIDE1_fp16(res, src, dst); +#elif STRIDE_X == 2 + POOLING3x3_STRIDE2_fp16(res, src, dst); +#elif STRIDE_X == 3 + POOLING3x3_STRIDE3_fp16(res, src, dst); +#endif /*STRIDE_X == 1*/ + + // Divide by pool region in case of average pooling +#if defined(POOL_AVG) || defined(POOL_L2) + ivec4 start_x = ((ivec4(int(gl_GlobalInvocationID.x) * 4) + ivec4(0, 1, 2, 3)) * (ivec4(STRIDE_X))) - (ivec4(PAD_X)); + int start_y = int(gl_GlobalInvocationID.y) * STRIDE_Y - PAD_Y; + ivec4 end_x = min((start_x + (ivec4(3))), (ivec4(MAX_WIDTH))); + int end_y = min((start_y + 3), MAX_HEIGHT); + res *= (vec4((1.f)) / vec4((ivec4(end_y - start_y)) * (end_x - start_x))); +#endif /*POOL_AVG*/ + +#if defined(POOL_L2) + // Take square root of the result in L2 pooling + res = SQRT_OP(res); +#endif /* defined(POOL_L2) */ + + STORE4_fp16(dst, CURRENT_OFFSET(dst) >> uint(3), res); +} + +#elif defined(POOLING_LAYER_7) +/** Performs a pooling function of pool size equal to 7. + * + * @note Supported data types are F16; + * @note In case of average pooling the following information must be passed at compile time: + * POOL_AVG must be provided otherwise max pooling will be performed. + * MAX_WIDTH and MAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad) + * STRIDE_X and STRIDE_Y which are the steps of the window along the x and y directions + * PAD_X and PAD_Y which are the pooling paddings in x and y dimension + * + * @param[in] src_ptr Pointer to the source image. Supported data types: F16 + * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] dst_ptr Pointer to the destination image. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image + */ +void main(void) +{ + // Get pixels pointer + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT_FP16(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst); + + //Load and calculate data + vec2 data; + uint res; +#if defined(POOL_AVG) || defined(POOL_L2) + data = calculate_avg(7, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y); +#else /*POOL_AVG*/ + data = calculate_max(7, src, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y); +#endif /*POOL_AVG*/ + +#if defined(POOL_L2) + // Take square root of the result in L2 pooling + data = SQRT_OP(data); +#endif /* defined(POOL_L2) */ + + res = uint(packHalf2x16(data)); + + // Store result + STORE1(dst, CURRENT_OFFSET(dst) >> uint(2), res); +} + +#elif defined(POOLING_LAYER_N) +/** Performs a pooling function of pool size equal to N + * + * @note Supported data types are F16; + * @note Pool size must be passed using POOL_SIZE e.g. POOL_SIZE=13; + * @note In case of average pooling the following information must be passed at compile time: + * POOL_AVG must be provided otherwise max pooling will be performed. + * MAX_WIDTH and MAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad) + * STRIDE_X and STRIDE_Y which are the steps of the window along the x and y directions + * PAD_X and PAD_Y which are the pooling paddings in x and y dimension + * + * @param[in] src_ptr Pointer to the source image. Supported data types: F16 + * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] dst_ptr Pointer to the destination image. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image + */ +void main(void) +{ + // Get pixels pointer + Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT_FP16(src); + Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT_FP16(dst); + + vec4 vdata00; + vdata00 = vec4(INITIAL_VALUE); + vec4 vdata01; + vdata01 = vec4(INITIAL_VALUE); + vec4 vdata10; + vdata10 = vec4(INITIAL_VALUE); + vec4 vdata11; + vdata11 = vec4(INITIAL_VALUE); + vec2 sdata; + sdata = vec2(INITIAL_VALUE); + + for(int y = 0; y < int(POOL_SIZE); y++) + { + int x = 0; + for(; x <= (int(POOL_SIZE) - 8); x += 8) + { + vec4 data2; + vec4 data3; + LOAD4_fp16(data2, src, (tensor3D_offset_fp16(src, x, y, 0) >> uint(2))); + LOAD4_fp16(data3, src, (tensor3D_offset_fp16(src, x, y, 0) >> uint(2)) + uint(2)); + +#if defined(POOL_L2) + // Raise to power of 2 for L2 Pooling + data2 *= data2; + data3 *= data3; +#endif /* defined(POOL_L2) */ + + POOL_OP(vdata00, vdata00, data2); + POOL_OP(vdata10, vdata10, data3); + } + + // Leftover + for(; x < int(POOL_SIZE); x = x + 2) + { + vec2 data4middle; + data4middle = load_and_unpack(src, (tensor3D_offset_fp16(src, x, y, 0) >> uint(2))); +#if defined(POOL_L2) + // Raise to power of 2 for L2 Pooling + data4middle *= data4middle; +#endif /* defined(POOL_L2) */ + if((x + 1) >= int(POOL_SIZE)) + { + POOL_OP_float(sdata.x, sdata.x, data4middle.x); + } + else + { + float data4; + POOL_OP_float(data4, data4middle.x, data4middle.y); + POOL_OP_float(sdata.x, sdata.x, data4); + } + } + } + + for(int y = STRIDE_X; y < int(POOL_SIZE + STRIDE_X); y++) + { + int x1 = STRIDE_X; + for(; x1 <= (int(POOL_SIZE + STRIDE_X) - 8); x1 += 8) + { + vec4 data2; + vec4 data3; + LOAD4_fp16(data2, src, (tensor3D_offset_fp16(src, x1, y, 0) >> uint(2))); + LOAD4_fp16(data3, src, (tensor3D_offset_fp16(src, x1, y, 0) >> uint(2)) + uint(2)); + +#if defined(POOL_L2) + // Raise to power of 2 for L2 Pooling + data2 *= data2; + data3 *= data3; +#endif /* defined(POOL_L2) */ + + POOL_OP(vdata01, vdata01, data2); + POOL_OP(vdata11, vdata11, data3); + } + + // Leftover + for(; x1 < int(POOL_SIZE + STRIDE_X); x1 = x1 + 2) + { + vec2 data4middle; + data4middle = load_and_unpack(src, (tensor3D_offset_fp16(src, x1, y, 0) >> uint(2))); +#if defined(POOL_L2) + // Raise to power of 2 for L2 Pooling + data4middle *= data4middle; +#endif /* defined(POOL_L2) */ + if((x1 + 1) >= int(POOL_SIZE + STRIDE_X)) + { + POOL_OP_float(sdata.y, sdata.y, data4middle.x); + } + else + { + float data4; + POOL_OP_float(data4, data4middle.x, data4middle.y); + POOL_OP_float(sdata.y, sdata.y, data4); + } + } + } + + //Reduce result + vec4 reduce40; + POOL_OP(reduce40, vdata00.xyzw, vdata10.xyzw); + vec2 reduce20; + POOL_OP_vec2(reduce20, reduce40.xy, reduce40.zw); + vec4 reduce41; + POOL_OP(reduce41, vdata01.xyzw, vdata11.xyzw); + vec2 reduce21; + POOL_OP_vec2(reduce21, reduce41.xy, reduce41.zw); + vec2 data; + POOL_OP_float(data.x, reduce20.x, reduce20.y); + POOL_OP_float(data.x, data.x, sdata.x); + POOL_OP_float(data.y, reduce21.x, reduce21.y); + POOL_OP_float(data.y, data.y, sdata.y); + +#if defined(POOL_AVG) || defined(POOL_L2) + { + // Divide by pool region in case of average pooling + int start_x1 = int(gl_GlobalInvocationID.x) * STRIDE_X - PAD_X; + int start_y1 = int(gl_GlobalInvocationID.y) * STRIDE_Y - PAD_Y; + int end_x1 = int(min(start_x1 + POOL_SIZE, MAX_WIDTH)); + int end_y1 = int(min(start_y1 + POOL_SIZE, MAX_HEIGHT)); + int start_x2 = start_x1 + STRIDE_X; + int start_y2 = start_y1; + int end_x2 = int(min(start_x2 + POOL_SIZE, MAX_WIDTH)); + int end_y2 = int(min(start_y2 + POOL_SIZE, MAX_HEIGHT)); + vec2 res1; + res1.x = float((end_y1 - start_y1) * (end_x1 - start_x1)); + res1.y = float((end_y2 - start_y2) * (end_x2 - start_x2)); + data.x = DIV_OP(data.x, res1.x); + data.y = DIV_OP(data.y, res1.y); + } +#endif /* defined(POOL_AVG) || defined(POOL_L2) */ + +#if defined(POOL_L2) + // Take square root of the result in L2 pooling + data = SQRT_OP(data); +#endif /* defined(POOL_L2) */ + uint res; + res = uint(packHalf2x16(data)); + + // Store result + STORE1(dst, CURRENT_OFFSET(dst) >> uint(2), res); +} +#endif /*POOLING_LAYER_2*/ +#endif /*DATA_TYPE_FP32 */ diff --git a/src/core/GLES_COMPUTE/cs_shaders/softmax_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/softmax_layer.cs new file mode 100644 index 0000000000..0bbabeaafc --- /dev/null +++ b/src/core/GLES_COMPUTE/cs_shaders/softmax_layer.cs @@ -0,0 +1,541 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in; + +#include "helpers.h" + +#define MAX_OP(x, y) max((x), (y)) +#define ADD_OP(x, y) ((x) + (y)) +#define SUB_OP(x, y) ((x) - (y)) +#define DIV_OP(x, y) ((x) / (y)) +#define EXP_OP(x) exp((x)) + +#if defined(DATA_TYPE_FP32) +const float MINVAL = -1.0 / 0.0; +vec4 type_min = CONVERT(MINVAL, vec4); + +#define LOAD16(name, offset) \ + vec4(LOAD4(name, offset), \ + LOAD4(name, offset + uint(1)), \ + LOAD4(name, offset + uint(2)), \ + LOAD4(name, offset + uint(3))) + +#define STORE16(name, offset, value) \ + STORE4(name, offset, value.x); \ + STORE4(name, offset + uint(1), value.y); \ + STORE4(name, offset + uint(2), value.z); \ + STORE4(name, offset + uint(3), value.w) + +#ifdef SOFTMAX_LAYER_MAX +BUFFER_DECLARATION(src, 1, float, readonly); +BUFFER_DECLARATION(dst, 2, float, writeonly); +#elif defined(SOFTMAX_LAYER_SHIFT_EXP_SUM) +BUFFER_DECLARATION(src, 1, float, readonly); +BUFFER_DECLARATION(max, 2, float, readonly); +BUFFER_DECLARATION(dst, 3, float, writeonly); +BUFFER_DECLARATION(sum, 4, float, writeonly); +#elif defined(SOFTMAX_LAYER_NORM) +BUFFER_DECLARATION(src, 1, float, readonly); +BUFFER_DECLARATION(sum, 2, float, readonly); +BUFFER_DECLARATION(dst, 3, float, writeonly); +#endif // SOFTMAX_LAYER_MAX + +layout(std140) uniform shader_params +{ +#ifdef SOFTMAX_LAYER_MAX + TENSOR3D_PARAM_DECLARATION(src); + TENSOR3D_PARAM_DECLARATION(dst); + uint width; +#elif defined(SOFTMAX_LAYER_SHIFT_EXP_SUM) + TENSOR3D_PARAM_DECLARATION(src); + TENSOR3D_PARAM_DECLARATION(max); + TENSOR3D_PARAM_DECLARATION(dst); + TENSOR3D_PARAM_DECLARATION(sum); + uint width; +#elif defined(SOFTMAX_LAYER_NORM) + TENSOR3D_PARAM_DECLARATION(src); + TENSOR3D_PARAM_DECLARATION(sum); + TENSOR3D_PARAM_DECLARATION(dst); +#endif // SOFTMAX_LAYER_MAX +}; + +#ifdef SOFTMAX_LAYER_MAX +/** Identifies the maximum value across the 1st dimension. + * + * @note Datatype must be given as a preprocessor argument using "#define DATA_TYPE_FP32" + * + * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: F32 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor slice. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[in] width Input image width + */ +void main(void) +{ + Image src = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(src); + Image dst = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(dst); + + // Initialize local maximum + vec4 max_val = CONVERT(type_min, vec4); + + // Calculate max of row + uint width2 = width >> 2; + for(int i = 0; i < int(width2); i++) + { + vec4 data = LOAD16(src, offset(src, i << 2, 0)); + max_val = MAX_OP(data, max_val); + } + +#ifdef NON_MULTIPLE_OF_4 + // Handle non multiple of 4 + for(int i = int(width2 << 2); i < int(width); i++) + { + float data = LOAD4(src, offset(src, i, 0)); + max_val.x = MAX_OP(data, max_val.x); + } +#endif /* NON_MULTIPLE_OF_4 */ + + // Perform max reduction + max_val.xy = MAX_OP(max_val.xy, max_val.zw); + max_val.x = MAX_OP(max_val.x, max_val.y); + + // Store result + STORE4(dst, CURRENT_OFFSET(dst), max_val.x); +} +#elif defined(SOFTMAX_LAYER_SHIFT_EXP_SUM) // SOFTMAX_LAYER_MAX +/** Shifts the values of the input tensor by the max calculated in softmax_layer_max kernel, + * then gets the exponent of each element as sums all elements across each row. + * + * @note Datatype must be given as a preprocessor argument using "#define DATA_TYPE_FP32" + * + * @note In case the input is not multiple of 4 NON_MULTIPLE_OF_4 must be passed. + * + * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: F32 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[in] max_ptr Pointer to the max values tensor slice. Supported data types: same as @p src_ptr + * @param[in] max_stride_x Stride of the max values tensor in X dimension (in bytes) + * @param[in] max_step_x max_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] max_stride_y Stride of the max values tensor in Y dimension (in bytes) + * @param[in] max_step_y max_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] max_stride_z Stride of the max values tensor in Z dimension (in bytes) + * @param[in] max_step_z max_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] max_offset_first_element_in_bytes The offset of the first element in the max values tensor + * @param[out] dst_ptr Pointer to the destination tensor slice. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[out] sum_ptr Pointer to the sum values tensor slice. Supported data types: same as @p src_ptr + * @param[in] sum_stride_x Stride of the sum values tensor in X dimension (in bytes) + * @param[in] sum_step_x sum_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] sum_stride_y Stride of the sum values tensor in Y dimension (in bytes) + * @param[in] sum_step_y sum_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] sum_stride_z Stride of the sum values tensor in Z dimension (in bytes) + * @param[in] sum_step_z sum_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] sum_offset_first_element_in_bytes The offset of the first element in the sum values tensor + * @param[in] width Input image width + */ +void main(void) +{ + Image src = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(src); + Image dst = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(dst); + Image max = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(max); + Image sum = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(sum); + + // Load max value of 1D logits vector (row) + vec4 max_val = CONVERT(LOAD4(max, CURRENT_OFFSET(max)), vec4); + + // Set sum vector + vec4 sum1D = CONVERT(0, vec4); + + // Shift values, exp and sum + uint width2 = width >> 2; + for(int i = 0; i < int(width2); i++) + { + vec4 data = LOAD16(src, offset(src, i << 2, 0)); + data = SUB_OP(data, max_val); + data = EXP_OP(data); + STORE16(dst, offset(dst, i << 2, 0), data); + sum1D = ADD_OP(sum1D, data); + } + +#ifdef NON_MULTIPLE_OF_4 + // Handle non multiple of 4 + for(int i = int(width2 << 2); i < int(width); i++) + { + float data; + data = LOAD4(src, offset(src, i, 0)); + data = SUB_OP(data, max_val.x); + data = EXP_OP(data); + STORE4(dst, offset(dst, i, 0), data); + sum1D.x = ADD_OP(sum1D.x, data); + } +#endif /* NON_MULTIPLE_OF_4 */ + + // Perform min/max reduction + sum1D.xy = ADD_OP(sum1D.xy, sum1D.zw); + sum1D.x = ADD_OP(sum1D.x, sum1D.y); + + // Calculate and store result + STORE4(sum, CURRENT_OFFSET(sum), sum1D.x); +} +#elif defined(SOFTMAX_LAYER_NORM) // SOFTMAX_LAYER_MAX +/** Divides all the values of the input tensor by the sum calculated from softmax_layer_shift_exp_sum kernel. + * + * @note Datatype must be given as a preprocessor argument using "#define DATA_TYPE_FP32" + * + * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: F32 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[in] sum_ptr Pointer to the sum values tensor slice. Supported data types: same as @p src_ptr + * @param[in] sum_stride_x Stride of the sum values tensor in X dimension (in bytes) + * @param[in] sum_step_x sum_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] sum_stride_y Stride of the sum values tensor in Y dimension (in bytes) + * @param[in] sum_step_y sum_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] sum_stride_z Stride of the sum values tensor in Z dimension (in bytes) + * @param[in] sum_step_z sum_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] sum_offset_first_element_in_bytes The offset of the first element in the sum values tensor + * @param[out] dst_ptr Pointer to the destination tensor slice. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + */ +void main(void) +{ + Image src = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(src); + Image dst = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(dst); + Image sum = CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(sum); + + // Load max value of 1D logits vector (row) + vec4 sum_val = CONVERT(LOAD4(sum, offset(sum, 0, int(gl_GlobalInvocationID.y))), vec4); + vec4 data = LOAD16(src, CURRENT_OFFSET(src)); + STORE16(dst, CURRENT_OFFSET(dst), DIV_OP(data, sum_val)); +} +#endif // SOFTMAX_LAYER_MAX + +#elif defined(DATA_TYPE_FP16) +precision mediump float; + +const float MINVAL1 = -1.0 / 0.0; +vec4 type_min1 = CONVERT(MINVAL1, vec4); + +#define GC_LOAD4_IMAGE(r, name, x, y) \ + load_and_unpack(r.xy, name, x, y); \ + load_and_unpack(r.zw, name, (x + 2), y) + +#define GC_STORE4_IMAGE(r, name, x, y) \ + GC_STORE1_2D_OFFSET(uint(packHalf2x16(r.xy)), name, x, y); \ + GC_STORE1_2D_OFFSET(uint(packHalf2x16(r.zw)), name, (x + 2), y) + +#ifdef SOFTMAX_LAYER_MAX +BUFFER_DECLARATION(src, 1, uint, readonly); +BUFFER_DECLARATION(dst, 2, uint, writeonly); +#elif defined(SOFTMAX_LAYER_SHIFT_EXP_SUM) +BUFFER_DECLARATION(src, 1, uint, readonly); +BUFFER_DECLARATION(max, 2, uint, readonly); +BUFFER_DECLARATION(dst, 3, uint, writeonly); +BUFFER_DECLARATION(sum, 4, uint, writeonly); +#elif defined(SOFTMAX_LAYER_NORM) +BUFFER_DECLARATION(src, 1, uint, readonly); +BUFFER_DECLARATION(sum, 2, uint, readonly); +BUFFER_DECLARATION(dst, 3, uint, writeonly); +#endif // SOFTMAX_LAYER_MAX + +layout(std140) uniform shader_params +{ +#ifdef SOFTMAX_LAYER_MAX + TENSOR3D_PARAM_DECLARATION(src); + TENSOR3D_PARAM_DECLARATION(dst); + uint width; +#elif defined(SOFTMAX_LAYER_SHIFT_EXP_SUM) + TENSOR3D_PARAM_DECLARATION(src); + TENSOR3D_PARAM_DECLARATION(max); + TENSOR3D_PARAM_DECLARATION(dst); + TENSOR3D_PARAM_DECLARATION(sum); + uint width; +#elif defined(SOFTMAX_LAYER_NORM) + TENSOR3D_PARAM_DECLARATION(src); + TENSOR3D_PARAM_DECLARATION(sum); + TENSOR3D_PARAM_DECLARATION(dst); +#endif // SOFTMAX_LAYER_MAX +}; + +#define load_and_unpack(rs, names, xs, ys) \ + do \ + { \ + uint packed_s; \ + GC_LOAD1_2D_OFFSET(packed_s, names, xs, ys); \ + rs = vec2(unpackHalf2x16(packed_s)); \ + } while(false) + +#ifdef SOFTMAX_LAYER_MAX +/** Identifies the maximum value across the 1st dimension. + * + * @note Datatype must be given as a preprocessor argument using "#define DATA_TYPE_FP16" + * + * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: F16 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor slice. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[in] width Input image width + */ +void main(void) +{ + Image src = GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT(src); + Image dst = GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT(dst); + + // Initialize local maximum + vec4 max_val1 = CONVERT(type_min1, vec4); + + // Calculate max of row + uint width2 = width >> 2; + for(int i = 0; i < int(width2); i++) + { + vec4 data1; + GC_LOAD4_IMAGE(data1, src, (i << 2), 0); + max_val1 = MAX_OP(data1, max_val1); + } + +#ifdef NON_MULTIPLE_OF_4 + // Handle non multiple of 4 + for(int i = int(width2 << 2); i < int(width); i = i + 2) + { + vec2 data; + load_and_unpack(data, src, i, 0); + max_val1.x = MAX_OP(data.x, max_val1.x); + if((i + 1) < int(width)) + { + max_val1.x = MAX_OP(data.y, max_val1.x); + } + } +#endif /* NON_MULTIPLE_OF_4 */ + + // Perform max reduction + max_val1.xy = MAX_OP(max_val1.xy, max_val1.zw); + max_val1.x = MAX_OP(max_val1.x, max_val1.y); + vec2 res1 = vec2(max_val1.x, 0.f); + uint res; + res = uint(packHalf2x16(res1)); + + // Store result + GC_STORE1_2D_OFFSET(res, dst, 0, 0); +} +#elif defined(SOFTMAX_LAYER_SHIFT_EXP_SUM) // SOFTMAX_LAYER_MAX +/** Shifts the values of the input tensor by the max calculated in softmax_layer_max kernel, + * then gets the exponent of each element as sums all elements across each row. + * + * @note Datatype must be given as a preprocessor argument using "#define DATA_TYPE_FP16" + * + * @note In case the input is not multiple of 4 NON_MULTIPLE_OF_4 must be passed. + * + * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: F16 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[in] max_ptr Pointer to the max values tensor slice. Supported data types: same as @p src_ptr + * @param[in] max_stride_x Stride of the max values tensor in X dimension (in bytes) + * @param[in] max_step_x max_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] max_stride_y Stride of the max values tensor in Y dimension (in bytes) + * @param[in] max_step_y max_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] max_stride_z Stride of the max values tensor in Z dimension (in bytes) + * @param[in] max_step_z max_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] max_offset_first_element_in_bytes The offset of the first element in the max values tensor + * @param[out] dst_ptr Pointer to the destination tensor slice. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[out] sum_ptr Pointer to the sum values tensor slice. Supported data types: same as @p src_ptr + * @param[in] sum_stride_x Stride of the sum values tensor in X dimension (in bytes) + * @param[in] sum_step_x sum_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] sum_stride_y Stride of the sum values tensor in Y dimension (in bytes) + * @param[in] sum_step_y sum_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] sum_stride_z Stride of the sum values tensor in Z dimension (in bytes) + * @param[in] sum_step_z sum_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] sum_offset_first_element_in_bytes The offset of the first element in the sum values tensor + * @param[in] width Input image width + */ +void main(void) +{ + Image src = GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT(src); + Image dst = GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT(dst); + Image max = GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT(max); + Image sum = GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT(sum); + + // Load max value of 1D logits vector (row) + vec2 datamaxinit; + load_and_unpack(datamaxinit, max, 0, 0); + vec4 max_val = CONVERT(datamaxinit.x, vec4); + + // Set sum vector + vec4 sum1D1 = CONVERT(0.f, vec4); + + // Shift values, exp and sum + uint width2 = width >> 2; + for(int i = 0; i < int(width2); i++) + { + vec4 data; + GC_LOAD4_IMAGE(data, src, (i << 2), 0); + data = SUB_OP(data, max_val); + data = EXP_OP(data); + GC_STORE4_IMAGE(data, dst, (i << 2), 0); + sum1D1 = ADD_OP(sum1D1, data); + } + +#ifdef NON_MULTIPLE_OF_4 + // Handle non multiple of 4 + for(int i = int(width2 << 2); i < int(width); i = i + 2) + { + vec2 datamiddle; + float data1; + load_and_unpack(datamiddle, src, i, 0); + data1 = SUB_OP(datamiddle.x, max_val.x); + data1 = EXP_OP(data1); + vec2 datares1; + if((i + 1) < int(width)) + { + float data2; + data2 = SUB_OP(datamiddle.y, max_val.x); + data2 = EXP_OP(data2); + datares1 = vec2(data1, data2); + data1 = ADD_OP(data2, data1); + } + else + { + datares1 = vec2(data1, 0.f); + } + uint datares; + datares = uint(packHalf2x16(datares1)); + GC_STORE1_2D_OFFSET(datares, dst, i, 0); + sum1D1.x = ADD_OP(sum1D1.x, data1); + } +#endif /* NON_MULTIPLE_OF_4 */ + + // Perform min/max reduction + sum1D1.xy = ADD_OP(sum1D1.xy, sum1D1.zw); + sum1D1.x = ADD_OP(sum1D1.x, sum1D1.y); + vec2 res1 = vec2(sum1D1.x, 0.f); + uint res; + res = uint(packHalf2x16(res1)); + // Calculate and store result + GC_STORE1_2D_OFFSET(res, sum, 0, 0); +} +#elif defined(SOFTMAX_LAYER_NORM) // SOFTMAX_LAYER_MAX +/** Divides all the values of the input tensor by the sum calculated from softmax_layer_shift_exp_sum kernel. + * + * @note Datatype must be given as a preprocessor argument using "#define DATA_TYPE_FP16" + * + * @param[in] src_ptr Pointer to the source tensor slice. Supported data types: F16 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[in] sum_ptr Pointer to the sum values tensor slice. Supported data types: same as @p src_ptr + * @param[in] sum_stride_x Stride of the sum values tensor in X dimension (in bytes) + * @param[in] sum_step_x sum_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] sum_stride_y Stride of the sum values tensor in Y dimension (in bytes) + * @param[in] sum_step_y sum_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] sum_stride_z Stride of the sum values tensor in Z dimension (in bytes) + * @param[in] sum_step_z sum_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] sum_offset_first_element_in_bytes The offset of the first element in the sum values tensor + * @param[out] dst_ptr Pointer to the destination tensor slice. Supported data types: same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_stride_z Stride of the destination tensor in Z dimension (in bytes) + * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + */ +void main(void) +{ + Image src = GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT(src); + Image dst = GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT(dst); + Image sum = GC_CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP(sum); + + // Load max value of 1D logits vector (row) + vec2 sum1; + load_and_unpack(sum1, sum, 0, int(gl_GlobalInvocationID.y)); + vec4 sum_val1 = CONVERT(sum1.x, vec4); + + vec4 data1; + GC_LOAD4_IMAGE(data1, src, 0, 0); + vec4 res = DIV_OP(data1, sum_val1); + GC_STORE4_IMAGE(res, dst, 0, 0); +} +#endif // SOFTMAX_LAYER_MAX +#endif // DATA_TYPE_FP32 \ No newline at end of file diff --git a/src/core/GLES_COMPUTE/cs_shaders/transpose.cs b/src/core/GLES_COMPUTE/cs_shaders/transpose.cs new file mode 100755 index 0000000000..6d020fe70d --- /dev/null +++ b/src/core/GLES_COMPUTE/cs_shaders/transpose.cs @@ -0,0 +1,187 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +layout(local_size_x = LOCAL_SIZE_X, local_size_y = LOCAL_SIZE_Y, local_size_z = LOCAL_SIZE_Z) in; +#include "helpers.h" + +#ifdef DATA_TYPE_FP32 +precision highp float; + +BUFFER_DECLARATION(src, 1, float, readonly); +BUFFER_DECLARATION(dst, 2, float, writeonly); + +layout(std140) uniform shader_params +{ + IMAGE_PARAM_DECLARATION(src); + IMAGE_PARAM_DECLARATION(dst); +}; + +#define LOAD16(r, name, offset) \ + r.x = LOAD4(name, offset); \ + r.y = LOAD4(name, offset + uint(1)); \ + r.z = LOAD4(name, offset + uint(2)); \ + r.w = LOAD4(name, offset + uint(3)) + +#define STORE16(name, offset, r) \ + STORE4(name, offset, r.x); \ + STORE4(name, offset + uint(1), r.y); \ + STORE4(name, offset + uint(2), r.z); \ + STORE4(name, offset + uint(3), r.w) + +/** This OpenGL ES kernel computes the matrix transposition of input matrix + * + * @param[in] src_ptr Pointer to the source matrix. Supported data types: F32 + * @param[in] src_stride_x Stride of the source matrix in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source matrix in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source matrix + * @param[out] dst_ptr Pointer to the destination matrix Supported data type: same as src_ptr + * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in bytes) + * @param[in] dst_step_x dst_gx_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in bytes) + * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination matrix + */ +void main(void) +{ + // Compute source address + Image src = CONVERT_TO_IMAGE_STRUCT(src); + Image dst = CONVERT_TO_IMAGE_STRUCT(dst); + + // Load the NxN block at (x, y) + vec4 u0; + vec4 u1; + vec4 u2; + vec4 u3; + LOAD16(u0, src, offset(src, 0, 0)); + LOAD16(u1, src, offset(src, 0, 1)); + LOAD16(u2, src, offset(src, 0, 2)); + LOAD16(u3, src, offset(src, 0, 3)); + + // Transpose the block + vec4 tmp; + tmp.xyz = u0.yzw; + u0.y = u1.x; + u0.z = u2.x; + u0.w = u3.x; + u1.x = tmp.x; + u2.x = tmp.y; + u3.x = tmp.z; + tmp.xy = u1.zw; + u1.z = u2.y; + u1.w = u3.y; + u2.y = tmp.x; + u3.y = tmp.y; + tmp.x = u2.w; + u2.w = u3.z; + u3.z = tmp.x; + + // Store the block at (y, x) + uint dst_offset_in_bytes = uint(16) * uint(gl_GlobalInvocationID.y) + uint(4) * uint(gl_GlobalInvocationID.x) * (dst.stride_y) + (dst.offset_first_element_in_bytes); + + STORE16(dst, uint((dst_offset_in_bytes + uint(0) * dst.stride_y) >> 2), u0); + STORE16(dst, uint((dst_offset_in_bytes + uint(1) * dst.stride_y) >> 2), u1); + STORE16(dst, uint((dst_offset_in_bytes + uint(2) * dst.stride_y) >> 2), u2); + STORE16(dst, uint((dst_offset_in_bytes + uint(3) * dst.stride_y) >> 2), u3); +} + +#elif defined(DATA_TYPE_FP16) +precision mediump float; + +BUFFER_DECLARATION(src, 1, uvec2, readonly); +BUFFER_DECLARATION(dst, 2, uvec2, writeonly); + +layout(std140) uniform shader_params +{ + IMAGE_PARAM_DECLARATION(src); + IMAGE_PARAM_DECLARATION(dst); +}; + +/** This OpenGL ES kernel computes the matrix transposition of input matrix + * + * @param[in] src_ptr Pointer to the source matrix. Supported data types: F16 + * @param[in] src_stride_x Stride of the source matrix in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source matrix in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source matrix + * @param[out] dst_ptr Pointer to the destination matrix Supported data type: same as src_ptr + * @param[in] dst_stride_x Stride of the destination matrix in X dimension (in bytes) + * @param[in] dst_step_x dst_gx_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination matrix in Y dimension (in bytes) + * @param[in] dst_step_y dst_gx_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination matrix + */ +void main(void) +{ + // Compute source address + Image src = GC_CONVERT_TO_IMAGE_STRUCT(src); + Image dst = GC_CONVERT_TO_IMAGE_STRUCT(dst); + + // Load the NxN block at (x, y) + vec4 u0; + vec4 u1; + vec4 u2; + vec4 u3; + uvec2 packed_s[4]; + GC_LOAD1_2D_OFFSET(packed_s[0], src, 0, 0); + GC_LOAD1_2D_OFFSET(packed_s[1], src, 0, 1); + GC_LOAD1_2D_OFFSET(packed_s[2], src, 0, 2); + GC_LOAD1_2D_OFFSET(packed_s[3], src, 0, 3); + u0 = vec4(unpackHalf2x16(packed_s[0].x), unpackHalf2x16(packed_s[0].y)); + u1 = vec4(unpackHalf2x16(packed_s[1].x), unpackHalf2x16(packed_s[1].y)); + u2 = vec4(unpackHalf2x16(packed_s[2].x), unpackHalf2x16(packed_s[2].y)); + u3 = vec4(unpackHalf2x16(packed_s[3].x), unpackHalf2x16(packed_s[3].y)); + + // Transpose the block + vec4 tmp; + tmp.xyz = u0.yzw; + u0.y = u1.x; + u0.z = u2.x; + u0.w = u3.x; + u1.x = tmp.x; + u2.x = tmp.y; + u3.x = tmp.z; + tmp.xy = u1.zw; + u1.z = u2.y; + u1.w = u3.y; + u2.y = tmp.x; + u3.y = tmp.y; + tmp.x = u2.w; + u2.w = u3.z; + u3.z = tmp.x; + + // Store the block at (y, x) + uint dst_offset_in_bytes = uint(8) * uint(gl_GlobalInvocationID.y) + uint(gl_GlobalInvocationID.x) * (dst_step_y) + (dst.offset_first_element_in_bytes); + + packed_s[0] = uvec2(packHalf2x16(u0.xy), packHalf2x16(u0.zw)); + packed_s[1] = uvec2(packHalf2x16(u1.xy), packHalf2x16(u1.zw)); + packed_s[2] = uvec2(packHalf2x16(u2.xy), packHalf2x16(u2.zw)); + packed_s[3] = uvec2(packHalf2x16(u3.xy), packHalf2x16(u3.zw)); + GC_STORE1(packed_s[0], dst, uint((dst_offset_in_bytes + uint(0) * dst_stride_y) >> 3)); + GC_STORE1(packed_s[1], dst, uint((dst_offset_in_bytes + uint(1) * dst_stride_y) >> 3)); + GC_STORE1(packed_s[2], dst, uint((dst_offset_in_bytes + uint(2) * dst_stride_y) >> 3)); + GC_STORE1(packed_s[3], dst, uint((dst_offset_in_bytes + uint(3) * dst_stride_y) >> 3)); +} +#endif /*ARM_COMPUTE_ENABLE_FP16*/ diff --git a/src/core/GLES_COMPUTE/egl_entries.in b/src/core/GLES_COMPUTE/egl_entries.in new file mode 100644 index 0000000000..64ccda63c9 --- /dev/null +++ b/src/core/GLES_COMPUTE/egl_entries.in @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +EGL_ENTRY(eglGetProcAddress) +EGL_ENTRY(eglBindAPI) +EGL_ENTRY(eglChooseConfig) +EGL_ENTRY(eglCreateContext) +EGL_ENTRY(eglDestroyContext) +EGL_ENTRY(eglGetDisplay) +EGL_ENTRY(eglInitialize) +EGL_ENTRY(eglMakeCurrent) +EGL_ENTRY(eglTerminate) +EGL_ENTRY(eglGetError) +EGL_ENTRY(eglQueryString) diff --git a/src/core/GLES_COMPUTE/gl_entries.in b/src/core/GLES_COMPUTE/gl_entries.in new file mode 100644 index 0000000000..15ce8ee819 --- /dev/null +++ b/src/core/GLES_COMPUTE/gl_entries.in @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +GL_ENTRY(glAttachShader) +GL_ENTRY(glCompileShader) +GL_ENTRY(glCreateProgram) +GL_ENTRY(glCreateShader) +GL_ENTRY(glDeleteProgram) +GL_ENTRY(glDeleteShader) +GL_ENTRY(glDetachShader) +GL_ENTRY(glGetProgramInfoLog) +GL_ENTRY(glGetProgramiv) +GL_ENTRY(glGetShaderInfoLog) +GL_ENTRY(glGetShaderiv) +GL_ENTRY(glLinkProgram) +GL_ENTRY(glShaderSource) +GL_ENTRY(glUseProgram) +GL_ENTRY(glBindBuffer) +GL_ENTRY(glBindBufferBase) +GL_ENTRY(glBufferData) +GL_ENTRY(glDeleteBuffers) +GL_ENTRY(glDispatchCompute) +GL_ENTRY(glFlush) +GL_ENTRY(glGenBuffers) +GL_ENTRY(glGetProgramResourceIndex) +GL_ENTRY(glGetUniformLocation) +GL_ENTRY(glMapBufferRange) +GL_ENTRY(glMemoryBarrier) +GL_ENTRY(glUniform1ui) +GL_ENTRY(glUnmapBuffer) +GL_ENTRY(glGetError) +GL_ENTRY(glGetActiveUniformBlockiv) +GL_ENTRY(glUniformBlockBinding) +GL_ENTRY(glGetUniformBlockIndex) +GL_ENTRY(glGenTextures) +GL_ENTRY(glDeleteTextures) +GL_ENTRY(glBindTexture) +GL_ENTRY(glTexImage2D) +GL_ENTRY(glGenFramebuffers) +GL_ENTRY(glDeleteFramebuffers) +GL_ENTRY(glBindFramebuffer) +GL_ENTRY(glFramebufferTexture2D) diff --git a/src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp new file mode 100644 index 0000000000..d76ae8ff1c --- /dev/null +++ b/src/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.cpp @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" +#include "support/ToolchainSupport.h" + +#include +#include + +using namespace arm_compute; + +GCAbsoluteDifferenceKernel::GCAbsoluteDifferenceKernel() + : _input1(nullptr), _input2(nullptr), _output(nullptr) +{ +} + +void GCAbsoluteDifferenceKernel::configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, output); + + _input1 = input1; + _input2 = input2; + _output = output; + + constexpr unsigned int num_elems_processed_per_iteration = 4; + + // Set kernel build options + std::set build_opts; + build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); + + // Create kernel + _kernel = static_cast(GCKernelLibrary::get().create_kernel("absdiff", build_opts)); + + // Configure kernel window + Window win = calculate_max_window(*input1->info(), Steps(num_elems_processed_per_iteration)); + + AccessWindowRectangle input1_access(input1->info(), 0, 0, 4, 1); + AccessWindowRectangle input2_access(input2->info(), 0, 0, 4, 1); + AccessWindowRectangle output_access(output->info(), 0, 0, 4, 1); + + update_window_and_padding(win, input1_access, input2_access, output_access); + + ValidRegion valid_region = intersect_valid_regions(input1->info()->valid_region(), + input2->info()->valid_region()); + + output_access.set_valid_region(win, valid_region); + + _kernel.clear_params(); + + // set shader params binding point + _kernel.set_shader_params_binding_point(0); + + IGCKernel::configure(win); +} + +void GCAbsoluteDifferenceKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window); + + _kernel.use(); + + Window slice = window.first_slice_window_2D(); + do + { + unsigned int idx = 0; + unsigned int binding = 1; // SSBO binding starts from 1. + add_2D_tensor_argument(idx, _input1, binding++, slice); + add_2D_tensor_argument(idx, _input2, binding++, slice); + add_2D_tensor_argument(idx, _output, binding++, slice); + + _kernel.update_shader_params(); + + enqueue(*this, slice); + } + while(window.slide_window_slice_2D(slice)); +} diff --git a/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp new file mode 100644 index 0000000000..42433cf076 --- /dev/null +++ b/src/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.cpp @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h" + +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" +#include "support/ToolchainSupport.h" + +#include +#include + +using namespace arm_compute; + +GCActivationLayerKernel::GCActivationLayerKernel() + : _input(nullptr), _output(nullptr) +{ +} + +void GCActivationLayerKernel::configure(IGCTensor *input, IGCTensor *output, ActivationLayerInfo act_info) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + + // Make sure _kernel is initialized before calling the parent's configure + _input = input; + _output = input; + + if(output != nullptr) + { + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position()); + + ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); + + _output = output; + } + + unsigned int num_elems_processed_per_iteration = 4 / input->info()->element_size(); + + // Set build options + std::set build_opts; + std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; + build_opts.emplace(("#define " + string_from_activation_func(act_info.activation()))); + build_opts.emplace(("#define " + dt_name)); + build_opts.emplace(("#define A_VAL " + float_to_string_with_full_precision(act_info.a()))); + build_opts.emplace(("#define B_VAL " + float_to_string_with_full_precision(act_info.b()))); + build_opts.emplace(("#define LOCAL_SIZE_X " + support::cpp11::to_string(1))); + build_opts.emplace(("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1))); + build_opts.emplace(("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1))); + + // Create kernel + _kernel = static_cast(GCKernelLibrary::get().create_kernel("activation_layer", build_opts)); + + // Configure kernel window + Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); + + if(output != nullptr) + { + AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); + + update_window_and_padding(win, + AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration), + output_access); + + output_access.set_valid_region(win, input->info()->valid_region()); + } + else + { + update_window_and_padding(win, + AccessWindowHorizontal(input->info(), 0, num_elems_processed_per_iteration)); + } + + _kernel.clear_params(); + + _kernel.set_shader_params_binding_point(0); + + IGCKernel::configure(win); +} + +void GCActivationLayerKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window); + + _kernel.use(); + + Window slice = window.first_slice_window_3D(); + + do + { + unsigned int idx = 0; + unsigned int binding = 1; + add_3D_tensor_argument(idx, _input, binding++, slice); + add_3D_tensor_argument(idx, _output, binding++, slice); + _kernel.update_shader_params(); + enqueue(*this, slice); + } + while(window.slide_window_slice_3D(slice)); +} diff --git a/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp new file mode 100644 index 0000000000..9c24d2ef42 --- /dev/null +++ b/src/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.cpp @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h" + +#include "arm_compute/core/AccessWindowStatic.h" +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +#include "support/ToolchainSupport.h" + +using namespace arm_compute; + +GCBatchNormalizationLayerKernel::GCBatchNormalizationLayerKernel() + : _input(nullptr), _output(nullptr), _mean(nullptr), _var(nullptr), _beta(nullptr), _gamma(nullptr), _epsilon(0.0f) +{ +} + +void GCBatchNormalizationLayerKernel::configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta, const IGCTensor *gamma, + float epsilon) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_NULLPTR(output); + + // Output tensor auto initialization if not yet initialized + auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position()); + + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, mean, var, beta, gamma); + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output, mean, var, beta, gamma); + ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); + ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(mean, var, beta, gamma); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != mean->info()->dimension(0)); + + _input = input; + _output = output; + _mean = mean; + _var = var; + _beta = beta; + _gamma = gamma; + _epsilon = epsilon; + + const unsigned int num_elems_processed_per_iteration = 4 / input->info()->element_size(); + + // Set build options + std::set build_opts; + std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; + build_opts.emplace(("#define " + dt_name)); + build_opts.emplace(("#define ESPILON " + float_to_string_with_full_precision(_epsilon))); + build_opts.emplace(("#define LOCAL_SIZE_X " + support::cpp11::to_string(1))); + build_opts.emplace(("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1))); + build_opts.emplace(("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1))); + + // Create kernel + _kernel = static_cast(GCKernelLibrary::get().create_kernel("batchnormalization_layer", build_opts)); + + // Configure kernel window + Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); + + AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); + AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); + AccessWindowStatic mean_access(mean->info(), 0, 0, mean->info()->dimension(0) + 1, mean->info()->dimension(1)); + AccessWindowStatic var_access(var->info(), 0, 0, var->info()->dimension(0) + 1, var->info()->dimension(1)); + AccessWindowStatic beta_access(beta->info(), 0, 0, beta->info()->dimension(0) + 1, beta->info()->dimension(1)); + AccessWindowStatic gamma_access(gamma->info(), 0, 0, gamma->info()->dimension(0) + 1, gamma->info()->dimension(1)); + + update_window_and_padding(win, input_access, output_access, mean_access, var_access, beta_access, gamma_access); + output_access.set_valid_region(win, input->info()->valid_region()); + + _kernel.clear_params(); + + _kernel.set_shader_params_binding_point(0); + + IGCKernel::configure(win); +} + +void GCBatchNormalizationLayerKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); + + _kernel.use(); + + Window slice = window.first_slice_window_3D(); + + Window vector_slice = window.first_slice_window_1D(); + vector_slice.set(Window::DimX, Window::Dimension(0, 0, 0)); + + unsigned int idx = 2 * num_arguments_per_3D_tensor(); + add_1D_tensor_argument(idx, _mean, 3, vector_slice); + add_1D_tensor_argument(idx, _var, 4, vector_slice); + add_1D_tensor_argument(idx, _beta, 5, vector_slice); + add_1D_tensor_argument(idx, _gamma, 6, vector_slice); + + do + { + idx = 0; + add_3D_tensor_argument(idx, _input, 1, slice); + add_3D_tensor_argument(idx, _output, 2, slice); + + _kernel.update_shader_params(); + enqueue(*this, slice); + } + while(window.slide_window_slice_3D(slice)); +} diff --git a/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp new file mode 100644 index 0000000000..10716232c9 --- /dev/null +++ b/src/core/GLES_COMPUTE/kernels/GCCol2ImKernel.cpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" + +using namespace arm_compute; + +GCCol2ImKernel::GCCol2ImKernel() + : _input(nullptr), _output(nullptr), _convolved_dims() +{ +} + +void GCCol2ImKernel::configure(const IGCTensor *input, IGCTensor *output, + std::pair convolved_dims) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + + _kernel.clear_params(); + + _input = input; + _output = output; + _convolved_dims = convolved_dims; + + // Create kernel + std::set build_opts; + constexpr unsigned int num_elems_processed_per_iteration = 8; + build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(num_elems_processed_per_iteration)); + build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(num_elems_processed_per_iteration)); + build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); + build_opts.insert("#define COL2IM"); + _kernel = static_cast(GCKernelLibrary::get().create_kernel("col2im", build_opts)); + + // Set static kernel arguments + unsigned int idx = num_arguments_per_2D_tensor() + num_arguments_per_3D_tensor(); + _kernel.set_params(idx++, _convolved_dims.first); + + // Configure window + Window win = calculate_max_window(*input->info(), Steps()); + + // The GCCol2ImKernel doesn't need padding so update_window_and_padding() can be skipped + output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); + + // set shader params binding point + _kernel.set_shader_params_binding_point(0); + + IGCKernel::configure(win); +} + +void GCCol2ImKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window); + + Window slice_in = window.first_slice_window_2D(); + Window slice_out = window.first_slice_window_3D(); + + _kernel.use(); + + do + { + // Set inputs + unsigned int idx = 0; + unsigned int binding = 1; + add_2D_tensor_argument(idx, _input, binding++, slice_in); + add_3D_tensor_argument(idx, _output, binding++, slice_out); + _kernel.update_shader_params(); + enqueue(*this, slice_in); + } + while(window.slide_window_slice_2D(slice_in) && window.slide_window_slice_3D(slice_out)); +} diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.cpp new file mode 100644 index 0000000000..7f9f438a46 --- /dev/null +++ b/src/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.cpp @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +#include "support/ToolchainSupport.h" + +using namespace arm_compute; + +GCDepthConcatenateKernel::GCDepthConcatenateKernel() + : _input(nullptr), _output(nullptr), _top_bottom(0), _left_right(0) +{ +} + +BorderSize GCDepthConcatenateKernel::border_size() const +{ + return BorderSize(_top_bottom, _left_right); +} + +void GCDepthConcatenateKernel::configure(const IGCTensor *input, unsigned int depth_offset, IGCTensor *output) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) + depth_offset > output->info()->dimension(2)); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) > output->info()->dimension(0)); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) > output->info()->dimension(1)); + ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(3, input, output); + + // The gaps between the two lowest dimensions of input and output need to be divisible by 2 + // Otherwise it is not clear how the padding should be added onto the input tensor + ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) - input->info()->dimension(0)) % 2); + ARM_COMPUTE_ERROR_ON((output->info()->dimension(1) - input->info()->dimension(1)) % 2); + + _input = input; + _output = output; + + // Add build options + std::set build_opts; + std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; + build_opts.emplace(("#define " + dt_name)); + build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); + + // Configure kernel window + _left_right = (output->info()->dimension(0) - input->info()->dimension(0)) / 2; + _top_bottom = (output->info()->dimension(1) - input->info()->dimension(1)) / 2; + + const int offset_to_first_elements_in_bytes = depth_offset * output->info()->strides_in_bytes()[2]; + + build_opts.emplace("#define OFFSETS_X " + support::cpp11::to_string(_left_right)); + build_opts.emplace("#define OFFSETS_Y " + support::cpp11::to_string(_top_bottom)); + build_opts.emplace("#define OFFSETS_Z " + support::cpp11::to_string(offset_to_first_elements_in_bytes)); + + // Create kernel + _kernel = static_cast(GCKernelLibrary::get().create_kernel("concatenate_depth", build_opts)); + + unsigned int num_elems_processed_per_iteration = 1; + unsigned int num_elems_read_per_iteration = 1; + if(input->info()->data_type() == DataType::F32) + { + num_elems_processed_per_iteration = 1; + num_elems_read_per_iteration = 1; + } + else if(input->info()->data_type() == DataType::F16) + { + num_elems_processed_per_iteration = 4; + num_elems_read_per_iteration = 4; + } + const unsigned int num_rows_read_per_iteration = 1; + + // The window needs to be based on input as we copy all the depths of input + Window win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration)); + win.set(Window::DimZ, Window::Dimension(0, input->info()->tensor_shape().z(), 1)); + + AccessWindowRectangle input_access(input->info(), -_left_right, -_top_bottom, num_elems_read_per_iteration, num_rows_read_per_iteration); + AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); + update_window_and_padding(win, input_access, output_access); + output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), output->info()->tensor_shape())); + + _kernel.clear_params(); + _kernel.set_shader_params_binding_point(0); + IGCKernel::configure(win); +} + +void GCDepthConcatenateKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window); + + _kernel.use(); + + Window slice = window.first_slice_window_3D(); + + do + { + if(_input->info()->data_type() == DataType::F32) + { + unsigned int idx = 0; + add_3D_tensor_argument(idx, _input, 1, slice); + add_3D_tensor_argument(idx, _output, 2, slice); + } + else if(_input->info()->data_type() == DataType::F16) + { + unsigned int idx = 0; + add_3D_tensor_argument(idx, _input, BufferParam(1, 3), slice); + add_3D_tensor_argument(idx, _output, BufferParam(2, 3), slice); + } + + _kernel.update_shader_params(); + + enqueue(*this, slice); + } + while(window.slide_window_slice_3D(slice)); +} diff --git a/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp new file mode 100644 index 0000000000..1fa2a71fff --- /dev/null +++ b/src/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.cpp @@ -0,0 +1,394 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h" + +#include "arm_compute/core/AccessWindowStatic.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/IAccessWindow.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "support/ToolchainSupport.h" + +using namespace arm_compute; + +template +GCDirectConvolutionLayerKernel::GCDirectConvolutionLayerKernel() + : _input(nullptr), _bias(nullptr), _weights(nullptr), _output(nullptr), _border_size(0), _conv_stride_x(0), _conv_stride_y(0), _conv_pad_x(0), _conv_pad_y(0), _lws(gles::NDRange(1U, 1U, 1U)) +{ +} + +template +BorderSize GCDirectConvolutionLayerKernel::border_size() const +{ + return _border_size; +} + +template +void GCDirectConvolutionLayerKernel::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *bias, IGCTensor *output, const PadStrideInfo &conv_info) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); + ARM_COMPUTE_ERROR_ON(weights->info()->dimension(2) != input->info()->dimension(2)); + ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != weights->info()->dimension(1)); + ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4); + ARM_COMPUTE_ERROR_ON_MSG((kernel_size == 3 && std::get<0>(conv_info.stride()) > 2), "Strides larger than 2 not supported in 3x3 direct convolution!"); + ARM_COMPUTE_ERROR_ON(kernel_size != weights->info()->dimension(0)); + + if(bias != nullptr) + { + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, bias); + // FIXME: Bug in framework, workaround it in tests currently. + //ARM_COMPUTE_ERROR_ON(bias->info()->dimension(0) != weights->info()->dimension(3)); + ARM_COMPUTE_ERROR_ON(bias->info()->num_dimensions() > 1); + } + + _conv_stride_x = std::get<0>(conv_info.stride()); + _conv_stride_y = std::get<1>(conv_info.stride()); + _conv_pad_x = std::get<0>(conv_info.pad()); + _conv_pad_y = std::get<1>(conv_info.pad()); + + _input = input; + _weights = weights; + _output = output; + _bias = bias; + _border_size = BorderSize(_conv_pad_y, _conv_pad_x); + + std::set options; + + options.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(_lws[0])); + options.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(_lws[1])); + options.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(_lws[2])); + options.emplace("#define STRIDE_X " + support::cpp11::to_string(_conv_stride_x)); + + std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; + options.emplace(("#define " + dt_name)); + + unsigned int num_elems_read_per_iteration_x = kernel_size * _conv_stride_x; + unsigned int num_elems_read_per_iteration_y = 1; + unsigned int num_elems_written_per_iteration_x = 1; + unsigned int num_elems_written_per_iteration_y = 1; + unsigned int num_elems_written_per_iteration_z = 1; + + if(kernel_size == 3) + { + if((_conv_stride_x == 1) && (_conv_stride_y == 1)) + { + switch(input->info()->data_type()) + { + // TODO(APPBROWSER-299): Choose the most optimal path and remove others. +#define PROCESS_X_4ELEMENTS_Y_3ELEMENTS_FP16 + + case DataType::F16: +#if defined(PROCESS_X_8ELEMENTS_Y_3ELEMENTS_FP16) + options.emplace("#define PROCESS_X_8ELEMENTS_Y_3ELEMENTS_FP16"); + num_elems_read_per_iteration_x = 16; + num_elems_read_per_iteration_y = 5; + num_elems_written_per_iteration_x = 8; + num_elems_written_per_iteration_y = 3; +#elif defined(PROCESS_X_4ELEMENTS_Y_3ELEMENTS_FP16) + options.emplace("#define PROCESS_X_4ELEMENTS_Y_3ELEMENTS_FP16"); + num_elems_read_per_iteration_x = 8; + num_elems_read_per_iteration_y = 5; + num_elems_written_per_iteration_x = 4; + num_elems_written_per_iteration_y = 3; +#elif defined(PROCESS_X_4ELEMENTS_Y_4ELEMENTS_FP16) + options.emplace("#define PROCESS_X_4ELEMENTS_Y_4ELEMENTS_FP16"); + num_elems_read_per_iteration_x = 8; + num_elems_read_per_iteration_y = 6; + num_elems_written_per_iteration_x = 4; + num_elems_written_per_iteration_y = 4; +#elif defined(PROCESS_X_4ELEMENTS_Y_3ELEMENTS_Z_2ELEMENTS_FP16) + options.emplace("#define PROCESS_X_4ELEMENTS_Y_3ELEMENTS_Z_2ELEMENTS_FP16"); + num_elems_read_per_iteration_x = 8; + num_elems_read_per_iteration_y = 5; + num_elems_written_per_iteration_x = 4; + num_elems_written_per_iteration_y = 3; + num_elems_written_per_iteration_z = 2; +#endif /* PROCESS_X_8ELEMENTS_Y_3ELEMENTS_FP16 */ + break; + + case DataType::F32: + options.emplace("#define PROCESS_X_4ELEMENTS_Y_3ELEMENTS"); + num_elems_read_per_iteration_x = 8; + num_elems_read_per_iteration_y = 5; + num_elems_written_per_iteration_x = 4; + num_elems_written_per_iteration_y = 3; + break; + + default: + ARM_COMPUTE_ERROR("Current data type is not supported"); + break; + } + } + // FIXME: Just keep one in release + else + { + switch(input->info()->data_type()) + { + case DataType::F16: + options.emplace("#define PROCESS_X_4ELEMENTS_FP16"); + num_elems_read_per_iteration_x = 8; + num_elems_written_per_iteration_x = 4; + break; + + case DataType::F32: + // TODO(APPBROWSER-299): Choose the most optimal path and remove others. +#define PROCESS_4_ELEMENT + +#if defined(PROCESS_1_ELEMENT) + options.emplace("#define PROCESS_1_ELEMENT"); + num_elems_read_per_iteration_x = 3; + num_elems_written_per_iteration_x = 1; +#elif defined(PROCESS_4_ELEMENT) + options.emplace("#define PROCESS_4_ELEMENT"); + num_elems_read_per_iteration_x = 8; + num_elems_written_per_iteration_x = 4; +#elif defined(PROCESS_8_ELEMENT) + options.emplace("#define PROCESS_8_ELEMENT"); + num_elems_read_per_iteration_x = 12; + num_elems_written_per_iteration_x = 8; +#else /* PROCESS_1_ELEMENT */ +#error Have to declare how many elements to process in one thread. +#endif /* PROCESS_1_ELEMENT */ + break; + + default: + ARM_COMPUTE_ERROR("Current data type is not supported"); + break; + } + } + } + else if(kernel_size == 1) + { + switch(input->info()->data_type()) + { + case DataType::F16: + num_elems_read_per_iteration_x = 8; + num_elems_written_per_iteration_x = 8; + break; + + case DataType::F32: + num_elems_read_per_iteration_x = 1; + num_elems_written_per_iteration_x = 1; + break; + + default: + break; + } + } + else if(kernel_size == 5) + { + switch(input->info()->data_type()) + { + case DataType::F16: + num_elems_read_per_iteration_x = 8; + num_elems_written_per_iteration_x = 4; + + default: + break; + } + } + else + { + } + + if(_bias != nullptr) + { + options.emplace("#define BIAS"); + } + + std::stringstream kernel_name; + kernel_name << "direct_convolution" << kernel_size << "x" << kernel_size; + + _kernel = static_cast(GCKernelLibrary::get().create_kernel(kernel_name.str(), options)); + + _kernel.clear_params(); + + unsigned int idx = (_bias == nullptr) ? 3 * num_arguments_per_3D_tensor() : (num_arguments_per_1D_tensor() + 3 * num_arguments_per_3D_tensor()); + + // Calculate output right and bottom border + const int output_width = output->info()->dimension(0); + const int output_height = output->info()->dimension(1); + const int output_padding_right = ceil_to_multiple(output_width, num_elems_written_per_iteration_x * _lws[0]) - output_width; + const int output_padding_bottom = ceil_to_multiple(output_height, num_elems_written_per_iteration_y * _lws[1]) - output_height; + + // Calculate input right and bottom border + const int input_width = input->info()->dimension(0); + const int input_height = input->info()->dimension(1); + const int upper_bound_w = ceil_to_multiple(((output_width + output_padding_right) * _conv_stride_x + (kernel_size - 1)), num_elems_read_per_iteration_x * _lws[0]) - _conv_pad_x - input_width; + const int upper_bound_h = ceil_to_multiple(((output_height + output_padding_bottom) * _conv_stride_y + (kernel_size - 1)), num_elems_read_per_iteration_y * _lws[1]) - _conv_pad_y - input_height; + const int padding_right = std::max(upper_bound_w, _conv_pad_x); + const int padding_bottom = std::max(upper_bound_h, _conv_pad_y); + + BorderSize border = BorderSize(0, output_padding_right, output_padding_bottom, 0); + + Window win = calculate_max_enlarged_window(*output->info(), Steps(num_elems_written_per_iteration_x, num_elems_written_per_iteration_y, num_elems_written_per_iteration_z), border); + + AccessWindowStatic input_access(input->info(), -_conv_pad_x, -_conv_pad_y, input_width + padding_right, input_height + padding_bottom); + AccessWindowStatic weights_access = AccessWindowStatic(nullptr, 0, 0, 0, 0); + AccessWindowStatic bias_access = AccessWindowStatic(nullptr, 0, 0, 0, 1); + + switch(weights->info()->data_type()) + { + case DataType::F16: + weights_access = AccessWindowStatic(weights->info(), 0, 0, kernel_size + 1, kernel_size); + if(_bias != nullptr) + { + bias_access = AccessWindowStatic(_bias->info(), 0, 0, _bias->info()->dimension(0) + 1, 1); + } + break; + + case DataType::F32: + weights_access = AccessWindowStatic(weights->info(), 0, 0, kernel_size, kernel_size); + if(_bias != nullptr) + { + bias_access = AccessWindowStatic(_bias->info(), 0, 0, _bias->info()->dimension(0), 1); + } + break; + + default: + ARM_COMPUTE_ERROR("Current data type is not supported"); + break; + } + + AccessWindowStatic output_access(output->info(), 0, 0, output_width + output_padding_right, output_height + output_padding_bottom); + + if(_bias != nullptr) + { + update_window_and_padding(win, input_access, weights_access, bias_access, output_access); + } + else + { + update_window_and_padding(win, input_access, weights_access, output_access); + } + + output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); + + _kernel.set_params(idx++, _weights->info()->strides_in_bytes()[3]); // weights_stride_w + _kernel.set_params(idx++, _weights->info()->dimension(2)); // weights_depth + + // set shader params binding point + _kernel.set_shader_params_binding_point(0); + + IGCKernel::configure(win); +} + +template +void GCDirectConvolutionLayerKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); + + _kernel.use(); + + // Get initial windows + Window slice = window.first_slice_window_3D(); + Window win_in = window; + + win_in.adjust(Window::DimX, -_conv_pad_x, true); + win_in.adjust(Window::DimY, -_conv_pad_y, true); + win_in.set_dimension_step(Window::DimX, window.x().step() * _conv_stride_x); + win_in.set_dimension_step(Window::DimY, window.y().step() * _conv_stride_y); + + Window slice_in = win_in.first_slice_window_3D(); + + unsigned int idx1 = 2 * num_arguments_per_3D_tensor(); + add_3D_tensor_argument(idx1, _weights, BufferParam(3, 2), slice); + + if(_bias != nullptr) + { + Window slice_bias; + slice_bias.use_tensor_dimensions(_bias->info()->tensor_shape()); + add_1D_tensor_argument(idx1, _bias, BufferParam(4, 2), slice_bias); + } + + do + { + unsigned int idx = 0; + + switch(_input->info()->data_type()) + { + case DataType::F16: + switch(kernel_size) + { + case 1: + add_3D_tensor_argument(idx, _input, BufferParam(1, 4), slice_in); + add_3D_tensor_argument(idx, _output, BufferParam(2, 4), slice); + break; + + case 3: + add_3D_tensor_argument(idx, _input, BufferParam(1, 3), slice_in); + add_3D_tensor_argument(idx, _output, BufferParam(2, 3), slice); + break; + + case 5: + add_3D_tensor_argument(idx, _input, BufferParam(1, 3), slice_in); + add_3D_tensor_argument(idx, _output, BufferParam(2, 3), slice); + break; + + default: + ARM_COMPUTE_ERROR("Current kernel size %d is not supported", kernel_size); + break; + } + break; + + case DataType::F32: + switch(kernel_size) + { + case 1: + case 5: + add_3D_tensor_argument(idx, _input, BufferParam(1, 2), slice_in); + add_3D_tensor_argument(idx, _output, BufferParam(2, 2), slice); + break; + + case 3: + add_3D_tensor_argument(idx, _input, BufferParam(1, 4), slice_in); + add_3D_tensor_argument(idx, _output, BufferParam(2, 4), slice); + break; + + default: + ARM_COMPUTE_ERROR("Current kernel size %d is not supported", kernel_size); + break; + } + break; + + default: + ARM_COMPUTE_ERROR("Current data type is not supported"); + break; + } + + _kernel.update_shader_params(); + enqueue(*this, slice, _lws); + } + while(window.slide_window_slice_3D(slice) && win_in.slide_window_slice_3D(slice_in)); +} + +template class arm_compute::GCDirectConvolutionLayerKernel<1>; +template class arm_compute::GCDirectConvolutionLayerKernel<3>; +template class arm_compute::GCDirectConvolutionLayerKernel<5>; diff --git a/src/core/GLES_COMPUTE/kernels/GCDropoutKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDropoutKernel.cpp new file mode 100644 index 0000000000..6244fbef80 --- /dev/null +++ b/src/core/GLES_COMPUTE/kernels/GCDropoutKernel.cpp @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "support/ToolchainSupport.h" + +#include +#include +#include + +using namespace arm_compute; + +GCDropoutKernel::GCDropoutKernel() + : _input(nullptr), _mask(nullptr), _output(nullptr), _num_elems_processed_per_iteration(0) +{ +} + +void GCDropoutKernel::configure(const IGCTensor *input, IGCTensor *mask, IGCTensor *output, float ratio, bool forward) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, mask, output); + + _input = input; + _mask = mask; + _output = output; + _kernel.clear_params(); + + std::set build_opts; + std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; + std::string fporbp = forward ? "FORWARD" : "BACKWARD"; + std::random_device rd; + std::mt19937 mt(rd()); + std::uniform_real_distribution dist(0.f, 1.f); + + build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); + build_opts.emplace("#define RATIO " + support::cpp11::to_string(ratio)); + build_opts.emplace("#define SCALE " + support::cpp11::to_string(1. / (1. - ratio))); + build_opts.emplace("#define SEED " + support::cpp11::to_string(dist(mt))); + build_opts.emplace("#define " + dt_name); + build_opts.emplace("#define " + fporbp); + + _num_elems_processed_per_iteration = 4 / input->info()->element_size(); + + // Create kernel + _kernel = static_cast(GCKernelLibrary::get().create_kernel("dropout", build_opts)); + + // Configure kernel window + Window win = calculate_max_window(*input->info(), Steps(_num_elems_processed_per_iteration)); + + output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); + + // set shader params binding point + _kernel.set_shader_params_binding_point(0); + IGCKernel::configure(win); +} + +void GCDropoutKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window); + + _kernel.use(); + + Window slice = window.first_slice_window_3D(); + + do + { + unsigned int idx = 0; + + add_3D_tensor_argument(idx, _input, BufferParam(1, 2), slice); + add_3D_tensor_argument(idx, _mask, BufferParam(2, 2), slice); + add_3D_tensor_argument(idx, _output, BufferParam(3, 2), slice); + + _kernel.update_shader_params(); + enqueue(*this, slice); + } + while(window.slide_window_slice_3D(slice)); +} diff --git a/src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp new file mode 100644 index 0000000000..36742ef81e --- /dev/null +++ b/src/core/GLES_COMPUTE/kernels/GCFillBorderKernel.cpp @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +#include +#include +#include + +using namespace arm_compute; + +GCFillBorderKernel::GCFillBorderKernel() + : IGCKernel(), _tensor(nullptr) +{ +} + +bool GCFillBorderKernel::is_parallelisable() const +{ + return false; +} + +template +void GCFillBorderKernel::set_constant_border(unsigned int idx, const PixelValue &constant_border_value) +{ + T value; + constant_border_value.get(value); + _kernel.set_params(idx, static_cast(value)); +} + +void GCFillBorderKernel::configure(const IGCTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value) +{ + ARM_COMPUTE_ERROR_ON(tensor == nullptr); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(tensor, 1, DataType::F32, DataType::F16); + ARM_COMPUTE_ERROR_ON(tensor->info()->num_channels() != 1); + + border_size.limit(tensor->info()->padding()); + + // If there is no border: early exit + if(border_size.empty() || border_mode == BorderMode::UNDEFINED) + { + return; + } + + // Select appropriate kernel + std::string kernel_name = "fill_image_borders_" + lower_string(string_from_border_mode(border_mode)); + + // Define build options + std::set build_opts; + build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); + build_opts.emplace("#define BORDER_SIZE_TOP " + support::cpp11::to_string(border_size.top)); + build_opts.emplace("#define BORDER_SIZE_BOTTOM " + support::cpp11::to_string(border_size.bottom)); + build_opts.emplace("#define BORDER_SIZE_LEFT " + support::cpp11::to_string(border_size.left)); + build_opts.emplace("#define BORDER_SIZE_RIGHT " + support::cpp11::to_string(border_size.right)); + + if(border_mode == BorderMode::REPLICATE) + { + build_opts.emplace("#define FILL_IMAGE_BORDERS_REPLICATE\n"); + } + else + { + build_opts.emplace("#define FILL_IMAGE_BORDERS_CONSTANT\n"); + } + + switch(tensor->info()->data_type()) + { + case DataType::F16: + build_opts.emplace("#define DATA_TYPE_FP16"); + break; + + case DataType::F32: + build_opts.emplace("#define DATA_TYPE_FP32"); + break; + + default: + ARM_COMPUTE_ERROR("Current data type is not supported"); + break; + } + + // Create kernel + _kernel = static_cast(GCKernelLibrary::get().create_kernel(kernel_name, build_opts)); + _tensor = tensor; + + _kernel.clear_params(); + + // Create static kernel arguments + const unsigned int valid_width = tensor->info()->valid_region().shape[0]; + const unsigned int valid_height = tensor->info()->valid_region().shape[1]; + const unsigned int total_valid_width = border_size.left + valid_width + border_size.right; + + // Set static kernel arguments + unsigned int idx = num_arguments_per_3D_tensor(); //Skip the tensor parameters + _kernel.set_params(idx++, valid_width); + _kernel.set_params(idx++, valid_height); + _kernel.set_params(idx++, tensor->info()->valid_region().anchor[0]); + _kernel.set_params(idx++, tensor->info()->valid_region().anchor[1]); + + if(BorderMode::CONSTANT == border_mode) + { + set_constant_border(idx++, constant_border_value); + } + + // Configure kernel window + Window win; + win.set(Window::DimX, Window::Dimension(0, total_valid_width + valid_height)); + win.set(Window::DimY, Window::Dimension(0, 1, 1)); + win.use_tensor_dimensions(tensor->info()->tensor_shape(), Window::DimZ); + + _kernel.set_shader_params_binding_point(0); + + IGCKernel::configure(win); +} + +void GCFillBorderKernel::run(const Window &window) +{ + // Border mode undefined or border width == 0 + if(_kernel.get_program() == 0) + { + return; + } + + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window); + + _kernel.use(); + Window slice = window.first_slice_window_3D(); + + do + { + unsigned int idx = 0; + add_3D_tensor_argument(idx, _tensor, 1, slice); + + _kernel.update_shader_params(); + + enqueue(*this, slice); + } + while(window.slide_window_slice_3D(slice)); +} diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp new file mode 100644 index 0000000000..5e3788af99 --- /dev/null +++ b/src/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.cpp @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +using namespace arm_compute; + +GCGEMMInterleave4x4Kernel::GCGEMMInterleave4x4Kernel() + : _input(nullptr), _output(nullptr) +{ +} + +void GCGEMMInterleave4x4Kernel::configure(const IGCTensor *input, IGCTensor *output) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_NULLPTR(output); + + TensorShape output_shape = input->info()->tensor_shape(); + output_shape.set(0, input->info()->dimension(0) * 4); + output_shape.set(1, std::ceil(input->info()->dimension(1) / 4.0f)); + + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position()); + + ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + + _input = input; + _output = output; + + std::set build_opts; + std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; + build_opts.emplace(("#define " + dt_name)); + build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); + + // Create kernel + build_opts.emplace("#define GEMM_INTERLEAVE4x4"); + _kernel = static_cast(GCKernelLibrary::get().create_kernel("gemm_interleave4x4", build_opts)); + + // Configure kernel window + const unsigned int num_elems_processed_per_iteration_x = max_gc_vector_width / data_size_from_type(input->info()->data_type()); + constexpr unsigned int num_elems_processed_per_iteration_y = 4; + const unsigned int num_elems_written_per_iteration = num_elems_processed_per_iteration_x * num_elems_processed_per_iteration_y; + + Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); + + AccessWindowRectangle input_access(input->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y); + AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_written_per_iteration, 1, 4.f, 0.25f); + + update_window_and_padding(win, input_access, output_access); + + output_access.set_valid_region(win, input->info()->valid_region()); + + _kernel.clear_params(); + + // set shader params binding point + _kernel.set_shader_params_binding_point(0); + + IGCKernel::configure(win); +} + +void GCGEMMInterleave4x4Kernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window); + + _kernel.use(); + + /* + * This kernel puts the values in a 4x4 block of Matrix A on the same row (Interleaved values) + * |a00 a01 a02 a03| + * |a10 a11 a12 a13| + * |a20 a21 a22 a23| = | a00 a10 a20 a30 || a01 a11 a21 a31 || a02 a12 a22 a32 || a03 a13 a23 a33 | + * |a30 a31 a32 a33| + * + * After this operation, the output matrix will have the following shape: [ height * 4, width / 4 ] + */ + Window in_slice = window.first_slice_window_2D(); + Window out_slice = window.first_slice_window_2D(); + + // Change x and y steps for the slide of output tensor + out_slice.scale(Window::DimX, 4.f); + out_slice.scale(Window::DimY, 0.25f); + + do + { + unsigned int idx = 0; + add_2D_tensor_argument(idx, _input, 1, in_slice); + add_2D_tensor_argument(idx, _output, 2, out_slice); + + _kernel.update_shader_params(); + + enqueue(*this, in_slice); + } + while(window.slide_window_slice_2D(in_slice) && window.slide_window_slice_2D(out_slice)); +} diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp new file mode 100644 index 0000000000..434070a46c --- /dev/null +++ b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.cpp @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h" + +#include "arm_compute/core/AccessWindowStatic.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" + +using namespace arm_compute; + +GCGEMMMatrixAccumulateBiasesKernel::GCGEMMMatrixAccumulateBiasesKernel() + : _accum(nullptr), _biases(nullptr) +{ +} + +void GCGEMMMatrixAccumulateBiasesKernel::configure(IGCTensor *accum, const IGCTensor *biases) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum); + ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() != 1); + + _biases = biases; + _accum = accum; + + std::set build_opts; + build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); + + // Create kernel + build_opts.emplace("#define GEMM_ACCUMULATE_BIASES"); + std::string dt_name = (accum->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; + build_opts.emplace(("#define " + dt_name)); + _kernel = GCKernelLibrary::get().create_kernel("gemm_accumulate_biases", build_opts); + + // Configure kernel window + unsigned int num_elems_processed_per_iteration = 1; + + if(_accum->info()->data_type() == DataType::F32) + { + num_elems_processed_per_iteration = 16; + } + else if(_accum->info()->data_type() == DataType::F16) + { + num_elems_processed_per_iteration = 4; + } + + Window win = calculate_max_window(*_accum->info(), Steps(num_elems_processed_per_iteration)); + + AccessWindowStatic biases_access(biases->info(), 0, 0, ceil_to_multiple(biases->info()->dimension(0), num_elems_processed_per_iteration), biases->info()->dimension(1)); + AccessWindowHorizontal accum_access(_accum->info(), 0, num_elems_processed_per_iteration); + + update_window_and_padding(win, biases_access, accum_access); + + _kernel.clear_params(); + // set shader params binding point + _kernel.set_shader_params_binding_point(0); + + IGCKernel::configure(win); +} + +void GCGEMMMatrixAccumulateBiasesKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window); + + _kernel.use(); + + Window accum_slice = window.first_slice_window_2D(); + + Window biases_slice(accum_slice); + biases_slice.set(Window::DimY, Window::Dimension(0, 1, 1)); + + // Run kernel + do + { + // Set arguments + unsigned int idx = 0; + if(_accum->info()->data_type() == DataType::F32) + { + add_2D_tensor_argument(idx, _accum, 1, accum_slice); + add_1D_tensor_argument(idx, _biases, 2, biases_slice); + } + else if(_accum->info()->data_type() == DataType::F16) + { + add_2D_tensor_argument(idx, _accum, BufferParam(1, 3), accum_slice); + add_1D_tensor_argument(idx, _biases, BufferParam(2, 3), biases_slice); + } + + _kernel.update_shader_params(); + + enqueue(*this, accum_slice); + } + while(window.slide_window_slice_2D(accum_slice)); +} diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp new file mode 100644 index 0000000000..fa0415249a --- /dev/null +++ b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +using namespace arm_compute; + +GCGEMMMatrixAdditionKernel::GCGEMMMatrixAdditionKernel() + : _input(nullptr), _output(nullptr) +{ +} + +void GCGEMMMatrixAdditionKernel::configure(const IGCTensor *input, IGCTensor *output, float beta) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != output->info()->dimension(0)); + ARM_COMPUTE_ERROR_ON(input->info()->dimension(1) != output->info()->dimension(1)); + + _input = input; + _output = output; + const unsigned int num_elems_processed_per_iteration = max_gc_vector_width / data_size_from_type(input->info()->data_type()); + + std::set build_opts; + std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; + build_opts.emplace(("#define " + dt_name)); + build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); + build_opts.emplace("#define BETA " + float_to_string_with_full_precision(beta)); + + // Create kernel + build_opts.emplace("#define GEMM_MATRIXADDITION"); + std::string data_type_name = lower_string(string_from_data_type(input->info()->data_type())); + _kernel = GCKernelLibrary::get().create_kernel(("gemm_ma"), build_opts); + + // Configure kernel window + Window win = calculate_max_window(*_input->info(), Steps(num_elems_processed_per_iteration)); + + AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); + AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); + + update_window_and_padding(win, input_access, output_access); + + output_access.set_valid_region(win, input->info()->valid_region()); + + _kernel.clear_params(); + // set shader params binding point + _kernel.set_shader_params_binding_point(0); + + IGCKernel::configure(win); +} + +void GCGEMMMatrixAdditionKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window); + + _kernel.use(); + + Window slice = window.first_slice_window_2D(); + + do + { + unsigned int idx = 0; + add_2D_tensor_argument(idx, _input, 1, slice); + add_2D_tensor_argument(idx, _output, 2, slice); + + _kernel.update_shader_params(); + + enqueue(*this, slice); + } + while(window.slide_window_slice_2D(slice)); +} diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp new file mode 100644 index 0000000000..ea9b3874b2 --- /dev/null +++ b/src/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.cpp @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h" + +#include "arm_compute/core/AccessWindowStatic.h" +#include "arm_compute/core/AccessWindowTranspose.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +#include +#include + +using namespace arm_compute; + +GCGEMMMatrixMultiplyKernel::GCGEMMMatrixMultiplyKernel() + : _input0(nullptr), _input1(nullptr), _output(nullptr) +{ +} + +void GCGEMMMatrixMultiplyKernel::configure(const IGCTensor *input0, const IGCTensor *input1, IGCTensor *output, float alpha, bool is_interleaved_transposed) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F32, DataType::F16); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1, output); + + if(!is_interleaved_transposed) + { + ARM_COMPUTE_ERROR_ON(input0->info()->dimension(0) != input1->info()->dimension(1)); + } + + _input0 = input0; + _input1 = input1; + _output = output; + + std::set build_opts; + Window win; + + build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); + build_opts.emplace("#define COLS_A " + support::cpp11::to_string(input0->info()->dimension(0))); + build_opts.emplace("#define COLS_B " + support::cpp11::to_string(input1->info()->dimension(0))); + build_opts.emplace("#define ALPHA " + float_to_string_with_full_precision(alpha)); + + // Check if the output tensor is a vector. If so,the kernel runs the vector-matrix multiplication + if(is_interleaved_transposed) + { + switch(input0->info()->data_type()) + { + case DataType::F16: + build_opts.emplace("#define DATA_TYPE_FP16"); + break; + + case DataType::F32: + build_opts.emplace("#define DATA_TYPE_FP32"); + break; + + default: + ARM_COMPUTE_ERROR("Current data type is not supported"); + break; + } + + build_opts.emplace("#define GEMM_MM_INTERLEAVED_TRANSPOSED"); + + // Create kernel + _kernel = GCKernelLibrary::get().create_kernel(("gemm_mm_interleaved_transposed"), build_opts); + + // Configure window kernel + const unsigned int num_elems_processed_per_iteration_x = max_gc_vector_width / data_size_from_type(input0->info()->data_type()); + constexpr unsigned int num_elems_processed_per_iteration_y = 4; + + win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); + + AccessWindowRectangle input0_access(input0->info(), 0, 0, num_elems_processed_per_iteration_y, 1, 1.f, 0.25f); + AccessWindowTranspose input1_access(input1->info(), 0, 0, num_elems_processed_per_iteration_x, 1, 0.f, 0.25f); + AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y); + + update_window_and_padding(win, input0_access, input1_access, output_access); + + output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), output->info()->tensor_shape())); + } + else + { + ARM_COMPUTE_ERROR_ON(input0->info()->dimension(0) != input1->info()->dimension(1)); + + // Special case for 1xN, 2xN, 3xN and 4xN input0 tensor + unsigned int num_elems_processed_per_iteration_x; + unsigned int num_elems_processed_per_iteration_y; + + switch(input0->info()->data_type()) + { + case DataType::F16: + num_elems_processed_per_iteration_x = 4; + num_elems_processed_per_iteration_y = 1; + build_opts.emplace("#define DATA_TYPE_FP16"); + break; + + case DataType::F32: + num_elems_processed_per_iteration_x = max_gc_vector_width / data_size_from_type(input0->info()->data_type()); + num_elems_processed_per_iteration_y = std::min(static_cast(output->info()->dimension(1)), 4); + build_opts.emplace("#define DATA_TYPE_FP32"); + break; + + default: + ARM_COMPUTE_ERROR("Current data type is not supported"); + break; + } + + build_opts.emplace("#define GEMM_MM_FLOATING_POINT"); + build_opts.emplace("#define NUM_ELEMS_PROCESSED_PER_THREAD_X " + support::cpp11::to_string(num_elems_processed_per_iteration_x)); + build_opts.emplace("#define NUM_ELEMS_PROCESSED_PER_THREAD_Y " + support::cpp11::to_string(num_elems_processed_per_iteration_y)); + + // Create kernel + _kernel = GCKernelLibrary::get().create_kernel("gemm_mm_floating_point", build_opts); + + win = calculate_max_window(*output->info(), Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); + + AccessWindowStatic input0_access(input0->info(), 0, 0, ceil_to_multiple(input0->info()->dimension(0), num_elems_processed_per_iteration_x), ceil_to_multiple(input0->info()->dimension(1), + num_elems_processed_per_iteration_y)); + AccessWindowStatic input1_access(input1->info(), 0, 0, ceil_to_multiple(input1->info()->dimension(0), num_elems_processed_per_iteration_x), input1->info()->dimension(1)); + AccessWindowRectangle output_access(output->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y); + + update_window_and_padding(win, input0_access, input1_access, output_access); + + Coordinates coord; + coord.set_num_dimensions(output->info()->num_dimensions()); + output_access.set_valid_region(win, ValidRegion(coord, output->info()->tensor_shape())); + } + + _kernel.clear_params(); + _kernel.set_shader_params_binding_point(0); + IGCKernel::configure(win); +} + +void GCGEMMMatrixMultiplyKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window); + + _kernel.use(); + + Window slice = window.first_slice_window_2D(); + Window slice_matrix_b = slice; + + slice_matrix_b.set(Window::DimX, Window::Dimension(0, 1, 1)); + slice_matrix_b.set(Window::DimY, Window::Dimension(0, 1, 1)); + + do + { + Window slice_b = slice; + // Don't slice matrix B along the z dimension if matrix B has just 2 dimensions and matrix A more than 2 + // This scenario can happen when the the matrix multiplication is used to perform a convolution operation + if(_input1->info()->num_dimensions() < 3) + { + slice_b = slice_matrix_b; + } + + unsigned int idx = 0; + switch(_input0->info()->data_type()) + { + case DataType::F16: + add_2D_tensor_argument(idx, _input0, BufferParam(1, 2), slice); + add_2D_tensor_argument(idx, _input1, BufferParam(2, 3), slice_b); + add_2D_tensor_argument(idx, _output, BufferParam(3, 3), slice); + break; + + case DataType::F32: + add_2D_tensor_argument(idx, _input0, BufferParam(1, 2), slice); + add_2D_tensor_argument(idx, _input1, BufferParam(2, 2), slice_b); + add_2D_tensor_argument(idx, _output, BufferParam(3, 2), slice); + break; + + default: + ARM_COMPUTE_ERROR("Current data type is not supported"); + break; + } + + _kernel.update_shader_params(); + enqueue(*this, slice); + } + while(window.slide_window_slice_2D(slice)); +} diff --git a/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp new file mode 100644 index 0000000000..a1270b4c3d --- /dev/null +++ b/src/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.cpp @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h" + +#include "arm_compute/core/AccessWindowTranspose.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +#include + +using namespace arm_compute; + +void GCGEMMTranspose1xWKernel::configure(const IGCTensor *input, IGCTensor *output) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_NULLPTR(output); + + TensorShape output_shape{ input->info()->tensor_shape() }; + const size_t transpose_w = 16 / input->info()->element_size(); + output_shape.set(0, input->info()->dimension(1) * transpose_w); + output_shape.set(1, static_cast(std::ceil((input->info()->dimension(0) / static_cast(transpose_w))))); + + // Output tensor auto inizialitation if not yet initialized + auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position()); + + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); + + const unsigned int num_elems_processed_per_iteration = 16 / input->info()->element_size(); + const int scale_x = num_elems_processed_per_iteration; + + _input = input; + _output = output; + + std::set build_opts; + std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; + build_opts.emplace(("#define " + dt_name)); + build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); + /* + * Following an example of how the transposition1xW works when the input data type is F32 + * + * |a00 a01 a02 a03| + * |a10 a11 a12 a13| + * |a20 a21 a22 a23| = | a00 a01 a02 a03 || a10 a11 a12 a13 || a20 a21 a22 a23 || a30 a31 a32 a33 | + * |a30 a31 a32 a33| + * + * The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor) + */ + // Create kernel + build_opts.emplace("#define GEMM_TRANSPOSE1xW"); + _kernel = GCKernelLibrary::get().create_kernel("gemm_transpose1x4", build_opts); + + // Configure window + Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); + + ARM_COMPUTE_ERROR_ON_MSG((win.x().end() / scale_x) == 0, "Transposed shape would be 0 in the second dimension"); + + AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); + AccessWindowTranspose output_access(output->info(), 0, 0, num_elems_processed_per_iteration, 1, scale_x, 1.f / scale_x); + + update_window_and_padding(win, input_access, output_access); + + output_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), input->info()->tensor_shape())); + + _kernel.clear_params(); + // set shader params binding point + _kernel.set_shader_params_binding_point(0); + + IGCKernel::configure(win); +} + +void GCGEMMTranspose1xWKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window); + + _kernel.use(); + + // Output is transposed + Window out_window(window); + out_window.set(Window::DimX, window.y()); + out_window.set(Window::DimY, window.x()); + + Window in_slice = window.first_slice_window_2D(); + Window out_slice = out_window.first_slice_window_2D(); + + do + { + unsigned int idx = 0; + add_2D_tensor_argument(idx, _input, 1, in_slice); + add_2D_tensor_argument(idx, _output, 2, out_slice); + + _kernel.update_shader_params(); + + enqueue(*this, in_slice); + } + while(window.slide_window_slice_2D(in_slice) && out_window.slide_window_slice_2D(out_slice)); +} diff --git a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp new file mode 100644 index 0000000000..935d8420ff --- /dev/null +++ b/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp @@ -0,0 +1,230 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h" + +#include "arm_compute/core/AccessWindowStatic.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "support/ToolchainSupport.h" + +#include +#include + +using namespace arm_compute; + +GCIm2ColKernel::GCIm2ColKernel() + : _input(nullptr), _output(nullptr), _convolved_dims(), _num_elems_processed_per_iteration(1), _run_func(nullptr) +{ +} + +void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, std::pair kernel_dims, const PadStrideInfo &conv_info, bool has_bias) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_UNUSED(kernel_dims); + + _input = input; + _output = output; + _kernel.clear_params(); + + std::set build_opts; + std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; + build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); + build_opts.insert("#define " + dt_name); + + if(has_bias) + { + build_opts.emplace("#define HAS_BIAS"); + } + + int pad_x = 0; + int pad_y = 0; + int stride_x = 0; + int stride_y = 0; + std::tie(pad_x, pad_y) = conv_info.pad(); + std::tie(stride_x, stride_y) = conv_info.stride(); + + const bool run_img2col_reduced = (output->info()->dimension(0) == (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2))) && (TensorShape::num_max_dimensions >= 4) + && (std::equal(input->info()->tensor_shape().cbegin() + 3, + input->info()->tensor_shape().cend(), + output->info()->tensor_shape().cbegin() + 1)) + && ((stride_x == 1) && (stride_y == 1) && (pad_x == 0) && (pad_y == 0)); + + if(!run_img2col_reduced) + { + // this path is currently not used and not validated + build_opts.insert("#define IM2COL_GENERIC"); + _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), + kernel_dims.first, kernel_dims.second, + conv_info); + _num_elems_processed_per_iteration = output->info()->dimension(0); + + build_opts.emplace("#define KERNEL_WIDTH " + support::cpp11::to_string(kernel_dims.first)); + build_opts.emplace("#define KERNEL_HEIGHT " + support::cpp11::to_string(kernel_dims.second)); + build_opts.emplace("#define KERNEL_DEPTH " + support::cpp11::to_string(input->info()->dimension(2))); + build_opts.emplace("#define CONVOLVED_WIDTH " + support::cpp11::to_string(_convolved_dims.first)); + build_opts.emplace("#define CONVOLVED_HEIGHT " + support::cpp11::to_string(_convolved_dims.second)); + build_opts.emplace("#define STRIDE_X " + support::cpp11::to_string(conv_info.stride().first)); + build_opts.emplace("#define STRIDE_Y " + support::cpp11::to_string(conv_info.stride().second)); + build_opts.emplace("#define PAD_X " + support::cpp11::to_string(conv_info.pad().first)); + build_opts.emplace("#define PAD_Y " + support::cpp11::to_string(conv_info.pad().second)); + build_opts.emplace("#define SRC_WIDTH " + support::cpp11::to_string(input->info()->dimension(0))); + build_opts.emplace("#define SRC_HEIGHT " + support::cpp11::to_string(input->info()->dimension(1))); + + // Create kernel + _kernel = static_cast(GCKernelLibrary::get().create_kernel("im2col_generic", build_opts)); + + _run_func = &GCIm2ColKernel::run_generic; + } + else + { + build_opts.insert("#define IM2COL_REDUCED"); + _num_elems_processed_per_iteration = 4 / input->info()->element_size(); + + // Create kernel + _kernel = static_cast(GCKernelLibrary::get().create_kernel("im2col_reduced", build_opts)); + + _run_func = &GCIm2ColKernel::run_reduced; + } + + // Configure kernel window + Window win = calculate_max_window(*input->info(), Steps(_num_elems_processed_per_iteration)); + + if(input->info()->data_type() == DataType::F16) + { + // Calculate input right and bottom border + AccessWindowHorizontal input_access(input->info(), 0, _num_elems_processed_per_iteration); + + // Calculate output right and bottom border + const int output_width = output->info()->dimension(0); + const int output_height = output->info()->dimension(1); + const int output_padding_right = ceil_to_multiple(output_width, _num_elems_processed_per_iteration) - output_width; + AccessWindowStatic output_access(output->info(), 0, 0, output_width + output_padding_right, output_height); + + update_window_and_padding(win, input_access, output_access); + } + + output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); + + if(!run_img2col_reduced) + { + // set the Z dimension's step same size as the whole dimension so that one can't split across the Z dimension + win.set_dimension_step(Window::DimZ, win[Window::DimZ].end() - win[Window::DimZ].start()); + } + + // set shader params binding point + _kernel.set_shader_params_binding_point(0); + IGCKernel::configure(win); +} + +void GCIm2ColKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON(_run_func == nullptr); + (this->*_run_func)(window); +} + +void GCIm2ColKernel::run_generic(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window); + + // Get initial windows + Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ); + // Change the Z dimension's step back to 1 + window_collapsed.set_dimension_step(Window::DimZ, 1); + + Window slice = window_collapsed.first_slice_window_3D(); + Window slice_in = window_collapsed.first_slice_window_3D(); + Window slice_out = window_collapsed.first_slice_window_3D(); + + // Setup slice + slice.set(Window::DimX, Window::Dimension(0, static_cast(_convolved_dims.first), 1)); + slice.set(Window::DimY, Window::Dimension(0, static_cast(_convolved_dims.second), 1)); + + // Setup input slice + // The first three dimensions of the input are increased by the inner loops + slice_in.set(Window::DimX, Window::Dimension(0, 0, 0)); + slice_in.set(Window::DimY, Window::Dimension(0, 0, 0)); + slice_in.set(Window::DimZ, Window::Dimension(0, 0, 0)); + + // Setup output slice + slice_out.set(Window::DimX, Window::Dimension(0, _output->info()->dimension(0), _num_elems_processed_per_iteration)); + slice_out.set(Window::DimY, Window::Dimension(0, _output->info()->dimension(1), 1)); + slice_out.set(Window::DimZ, Window::Dimension(0, 1, 1)); + + _kernel.use(); + + do + { + unsigned int idx = 0; + add_3D_tensor_argument(idx, _input, 1, slice_in); + add_2D_tensor_argument(idx, _output, 2, slice_out); + + _kernel.set_params(idx++, static_cast(_input->info()->dimension(2))); + _kernel.set_params(idx++, static_cast(_input->info()->strides_in_bytes()[3])); + _kernel.set_params(idx++, static_cast(_output->info()->strides_in_bytes()[3])); + _kernel.update_shader_params(); + + enqueue(*this, slice); + } + while(window_collapsed.slide_window_slice_3D(slice) && window_collapsed.slide_window_slice_3D(slice_out) && window_collapsed.slide_window_slice_3D(slice_in)); +} + +void GCIm2ColKernel::run_reduced(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_MISMATCHING_WINDOWS(IGCKernel::window(), window); + + Window out_window; + out_window.use_tensor_dimensions(_output->info()->tensor_shape()); + + Window out_slice = out_window.first_slice_window_1D(); + Window in_slice = window.first_slice_window_3D(); + + _kernel.use(); + + // Run kernel + do + { + // Set arguments + unsigned int idx = 0; + + add_3D_tensor_argument(idx, _input, 1, in_slice); + add_1D_tensor_argument(idx, _output, 2, out_slice); + _kernel.set_params(idx++, _input->info()->dimension(0)); + _kernel.set_params(idx++, _input->info()->dimension(1)); + _kernel.update_shader_params(); + + enqueue(*this, in_slice); + } + while(window.slide_window_slice_3D(in_slice) && out_window.slide_window_slice_1D(out_slice)); +} diff --git a/src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp new file mode 100644 index 0000000000..65e54f538c --- /dev/null +++ b/src/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.cpp @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h" + +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +#include + +using namespace arm_compute; + +GCNormalizationLayerKernel::GCNormalizationLayerKernel() + : _input(nullptr), _squared_input(nullptr), _output(nullptr), _border_size(0) +{ +} + +BorderSize GCNormalizationLayerKernel::border_size() const +{ + return _border_size; +} + +void GCNormalizationLayerKernel::configure(const IGCTensor *input, const IGCTensor *squared_input, IGCTensor *output, NormalizationLayerInfo norm_info) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_ERROR_ON_MSG(!(norm_info.norm_size() % 2), "Normalization size should be odd"); + ARM_COMPUTE_ERROR_ON_MSG(norm_info.type() == NormType::IN_MAP_2D, "2D In-Map Normalization not implemented"); + + // Set build options + std::set build_opts; + + _input = input; + _squared_input = squared_input; + _output = output; + + const bool is_in_map = (norm_info.type() == NormType::IN_MAP_1D); + const unsigned int border_width = is_in_map ? std::min(norm_info.norm_size() / 2, 3U) : 0; + _border_size = BorderSize(0, border_width); + + // Set kernel static arguments + std::string func_name = ((norm_info.type() == NormType::IN_MAP_1D) ? "IN_MAP_1D" : "CROSS_MAP"); + build_opts.emplace(("#define " + func_name)); + build_opts.emplace(("#define COEFF " + float_to_string_with_full_precision(norm_info.scale_coeff()))); + build_opts.emplace(("#define BETA " + float_to_string_with_full_precision(norm_info.beta()))); + build_opts.emplace(("#define KAPPA " + float_to_string_with_full_precision(norm_info.kappa()))); + build_opts.emplace(("#define RADIUS " + support::cpp11::to_string(norm_info.norm_size() / 2))); + build_opts.emplace(("#define LOCAL_SIZE_X " + support::cpp11::to_string(1))); + build_opts.emplace(("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1))); + build_opts.emplace(("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1))); + + // Create kernel + _kernel = static_cast(GCKernelLibrary::get().create_kernel("normalization_layer", build_opts)); + + // Configure kernel window + const unsigned int num_elems_processed_per_iteration = 1; + const unsigned int num_elems_read_per_iteration = num_elems_processed_per_iteration + 2 * (norm_info.norm_size() / 2); + + Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); + + AccessWindowHorizontal input_access(input->info(), -_border_size.left, num_elems_read_per_iteration); + AccessWindowHorizontal squared_input_access(squared_input->info(), -_border_size.left, num_elems_read_per_iteration); + AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); + + update_window_and_padding(win, input_access, squared_input_access, output_access); + + output_access.set_valid_region(win, input->info()->valid_region()); + + _kernel.clear_params(); + + _kernel.set_shader_params_binding_point(0); + + IGCKernel::configure(win); +} + +void GCNormalizationLayerKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); + + _kernel.use(); + + Window slice = window.first_slice_window_3D(); + + do + { + unsigned int idx = 0; + unsigned int binding = 1; + add_3D_tensor_argument(idx, _input, binding++, slice); + add_3D_tensor_argument(idx, _squared_input, binding++, slice); + add_3D_tensor_argument(idx, _output, binding++, slice); + + _kernel.update_shader_params(); + + enqueue(*this, slice); + } + while(window.slide_window_slice_3D(slice)); +} diff --git a/src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp new file mode 100644 index 0000000000..2b5cee455c --- /dev/null +++ b/src/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.cpp @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +#include +#include +#include +#include +using namespace arm_compute; + +GCPixelWiseMultiplicationKernel::GCPixelWiseMultiplicationKernel() + : _input1(nullptr), _input2(nullptr), _output(nullptr) +{ +} + +void GCPixelWiseMultiplicationKernel::configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output, float scale) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2); + ARM_COMPUTE_ERROR_ON_NULLPTR(output); + ARM_COMPUTE_ERROR_ON_MSG(scale < 0, "Scale cannot be negative. "); + + // Auto initialize output if not initialized + { + set_shape_if_empty(*output->info(), input1->info()->tensor_shape()); + set_format_if_unknown(*output->info(), Format::F32); + } + + ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input1, input2, output); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, output); + ARM_COMPUTE_ERROR_ON_MSG(scale < 0, "Scale cannot be negative. "); + + _input1 = input1; + _input2 = input2; + _output = output; + + std::string data_type; + std::string compute_type; + + // Set kernel build options + std::set build_opts; + build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); + + build_opts.emplace("#define SCALE " + support::cpp11::to_string(scale)); + + // Create kernel + _kernel = static_cast(GCKernelLibrary::get().create_kernel("pixelwise_mul_float", build_opts)); + + _kernel.clear_params(); + + // Configure kernel window + constexpr unsigned int num_elems_processed_per_iteration = 1; + + Window win = calculate_max_window(*input1->info(), Steps(num_elems_processed_per_iteration)); + + AccessWindowHorizontal input1_access(input1->info(), 0, num_elems_processed_per_iteration); + AccessWindowHorizontal input2_access(input2->info(), 0, num_elems_processed_per_iteration); + AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); + + update_window_and_padding(win, input1_access, input2_access, output_access); + + ValidRegion valid_region = intersect_valid_regions(input1->info()->valid_region(), + input2->info()->valid_region()); + output_access.set_valid_region(win, valid_region); + + // set shader params binding point + _kernel.set_shader_params_binding_point(0); + + IGCKernel::configure(win); +} + +void GCPixelWiseMultiplicationKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window); + + _kernel.use(); + + Window slice = window.first_slice_window_3D(); + + do + { + unsigned int idx = 0; + unsigned int binding = 1; + add_3D_tensor_argument(idx, _input1, binding++, slice); + add_3D_tensor_argument(idx, _input2, binding++, slice); + add_3D_tensor_argument(idx, _output, binding++, slice); + + _kernel.update_shader_params(); + enqueue(*this, slice); + } + while(window.slide_window_slice_3D(slice)); +} diff --git a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp new file mode 100644 index 0000000000..c877da3783 --- /dev/null +++ b/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp @@ -0,0 +1,254 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h" + +#include "arm_compute/core/AccessWindowStatic.h" +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +#include +#include +#include + +using namespace arm_compute; + +GCPoolingLayerKernel::GCPoolingLayerKernel() + : _input(nullptr), _output(nullptr), _pool_info(), _border_size(0), _num_elems_processed_per_iteration(1) +{ +} + +BorderSize GCPoolingLayerKernel::border_size() const +{ + return _border_size; +} + +void GCPoolingLayerKernel::configure(const IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info) +{ + int pool_pad_x = 0; + int pool_pad_y = 0; + int pool_stride_x = 0; + int pool_stride_y = 0; + unsigned int pooled_w = 0; + unsigned int pooled_h = 0; + const PoolingType pool_type = pool_info.pool_type(); + const int pool_size = pool_info.pool_size(); + const PadStrideInfo pad_stride_info = pool_info.pad_stride_info(); + std::tie(pool_pad_x, pool_pad_y) = pad_stride_info.pad(); + std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride(); + + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_NULLPTR(output); + ARM_COMPUTE_ERROR_ON(pool_pad_x >= pool_size || pool_pad_y >= pool_size); + ARM_COMPUTE_ERROR_ON(pool_size > 7 && is_data_type_fixed_point(input->info()->data_type())); + + // Check output dimensions + std::tie(pooled_w, pooled_h) = scaled_dimensions(input->info()->dimension(0), + input->info()->dimension(1), + pool_size, + pool_size, + pool_info.pad_stride_info()); + + // Output auto initialization if not yet initialized + { + TensorShape output_shape{ input->info()->tensor_shape() }; + output_shape.set(0, pooled_w); + output_shape.set(1, pooled_h); + + auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position()); + } + + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) != pooled_w) || (output->info()->dimension(1) != pooled_h)); + + const int input_width = input->info()->dimension(0); + const int input_height = input->info()->dimension(1); + + // Set instance variables + _input = input; + _output = output; + _pool_info = pool_info; + _border_size = BorderSize(pool_pad_y, pool_pad_x); + + // Set build options + std::set build_opts; + build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); + if(input->info()->data_type() == DataType::F32) + { + build_opts.insert("#define DATA_TYPE_FP32"); + } + else + { + build_opts.insert("#define DATA_TYPE_FP16"); + } + build_opts.emplace(("#define POOL_" + string_from_pooling_type(pool_type))); + build_opts.emplace(("#define STRIDE_X " + support::cpp11::to_string(pool_stride_x))); + build_opts.emplace(("#define MAX_WIDTH " + support::cpp11::to_string(input->info()->dimension(0) + pool_pad_x))); + build_opts.emplace(("#define MAX_HEIGHT " + support::cpp11::to_string(input->info()->dimension(1) + pool_pad_y))); + build_opts.emplace(("#define STRIDE_Y " + support::cpp11::to_string(pool_stride_y))); + build_opts.emplace(("#define PAD_X " + support::cpp11::to_string(pool_pad_x))); + build_opts.emplace(("#define PAD_Y " + support::cpp11::to_string(pool_pad_y))); + + // Create kernel + if((pool_size == 2) || (pool_size == 3) || (pool_size == 7)) + { + // Check if we have pool3x3 with stride_x less equal than 3. In these cases, run an optimized OpenGLES kernel where + // each thread computes 4 output elements + const bool is_pool3x3_stride_le3 = (pool_size == 3) && (pool_stride_x <= 3) && !is_data_type_fixed_point(input->info()->data_type()); + + int num_elements_read_per_iteration = (pool_size == 7) ? 8 : pool_size; + + if(input->info()->data_type() == DataType::F32) + { + if(is_pool3x3_stride_le3) + { + // Change the number of elements processed and number of elements read per iteration for pooling 3x3 with stride less equal than 3 + _num_elems_processed_per_iteration = 4; + num_elements_read_per_iteration = pool_size * (pool_stride_x + 1); + } + } + else + { + num_elements_read_per_iteration = pool_size; + if(is_pool3x3_stride_le3) + { + _num_elems_processed_per_iteration = 4; + } + else + { + _num_elems_processed_per_iteration = 2; + } + } + + const int upper_bound_w = ((pooled_w - 1) * pool_stride_x - pool_pad_x + num_elements_read_per_iteration) - input_width; + const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height; + + _border_size.right = std::max(upper_bound_w, pool_pad_x); + _border_size.bottom = std::max(upper_bound_h, pool_pad_y); + + std::string kernel_name = "pooling_layer_" + support::cpp11::to_string(pool_size); + if(is_pool3x3_stride_le3) + { + build_opts.insert("#define POOLING_LAYER_3_OPTIMIZED"); + _kernel = static_cast(GCKernelLibrary::get().create_kernel(kernel_name + "_optimized", build_opts)); + } + else + { + build_opts.insert("#define POOLING_LAYER_" + support::cpp11::to_string(pool_size)); + _kernel = static_cast(GCKernelLibrary::get().create_kernel(kernel_name, build_opts)); + } + } + else // Run general case + { + if(input->info()->data_type() == DataType::F32) + { + _num_elems_processed_per_iteration = 1; + } + else + { + _num_elems_processed_per_iteration = 2; + } + const int upper_bound_w = ((pooled_w - 1) * pool_stride_x - pool_pad_x + pool_size) - input_width; + const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height; + + _border_size.right = std::max(upper_bound_w, pool_pad_x); + _border_size.bottom = std::max(upper_bound_h, pool_pad_y); + + build_opts.emplace(("#define POOL_SIZE " + support::cpp11::to_string(pool_size))); + + build_opts.insert("#define POOLING_LAYER_N"); + _kernel = static_cast(GCKernelLibrary::get().create_kernel("pooling_layer_n", build_opts)); + } + + Window win = calculate_max_window(*output->info(), Steps(_num_elems_processed_per_iteration)); + + if(input->info()->data_type() == DataType::F32) + { + AccessWindowStatic input_access(input->info(), -pool_pad_x, -pool_pad_y, input_width + _border_size.right, input_height + _border_size.bottom); + AccessWindowHorizontal output_access(output->info(), 0, _num_elems_processed_per_iteration); + update_window_and_padding(win, input_access, output_access); + output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); + } + else + { + // Calculate output right and bottom border + const int output_width = output->info()->dimension(0); + const int output_height = output->info()->dimension(1); + const int output_padding_right = ceil_to_multiple(output_width, _num_elems_processed_per_iteration) - output_width; + const int output_padding_bottom = ceil_to_multiple(output_height, 1) - output_height; + const int input_padding_right = ceil_to_multiple(input_width + 2 * _border_size.right, _num_elems_processed_per_iteration) - (input_width + 2 * _border_size.right); + const int input_padding_bottom = ceil_to_multiple(input_height + 2 * _border_size.bottom, 1) - (input_height + 2 * _border_size.bottom); + + // Configure kernel window + AccessWindowStatic input_access(input->info(), -pool_pad_x, -pool_pad_y, input_width + _border_size.right + input_padding_right, input_height + _border_size.bottom + input_padding_bottom); + AccessWindowStatic output_access(output->info(), 0, 0, output_width + output_padding_right, output_height + output_padding_bottom); + update_window_and_padding(win, input_access, output_access); + output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); + } + + _kernel.clear_params(); + _kernel.set_shader_params_binding_point(0); + + IGCKernel::configure(win); +} + +void GCPoolingLayerKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); + + unsigned int pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y = 0; + std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad(); + std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride(); + + _kernel.use(); + + Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ); + Window slice = window_collapsed.first_slice_window_3D(); + + do + { + // Upsample input by pool size + Window in_slice(slice); + in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start() - pool_pad_x, in_slice.x().end() * pool_stride_x, pool_stride_x * _num_elems_processed_per_iteration)); + in_slice.set(Window::DimY, Window::Dimension(in_slice.y().start() - pool_pad_y, in_slice.y().end() * pool_stride_y, pool_stride_y)); + + // Set inputs + unsigned int idx = 0; + add_3D_tensor_argument(idx, _input, 1, in_slice); + add_3D_tensor_argument(idx, _output, 2, slice); + + _kernel.update_shader_params(); + enqueue(*this, slice); + } + while(window_collapsed.slide_window_slice_3D(slice)); +} diff --git a/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp new file mode 100644 index 0000000000..09a0f79ab2 --- /dev/null +++ b/src/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.cpp @@ -0,0 +1,353 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h" + +#include "arm_compute/core/AccessWindowStatic.h" +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/core/Window.h" + +#include +#include + +using namespace arm_compute; + +void GCLogits1DMaxKernel::configure(const IGCTensor *input, IGCTensor *output) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_NULLPTR(output); + + // Softmax across the x dimension + TensorShape output_shape{ input->info()->tensor_shape() }; + output_shape.set(0, 1); + + // Output auto initialization if not yet initialized + auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position()); + + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); + + _input = input; + _output = output; + + // Set build options + std::set build_opts; + std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; + build_opts.insert("#define " + dt_name); + build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); + build_opts.insert("#define SOFTMAX_LAYER_MAX"); + + // Tell the kernel that the width is not a multiple of 4 + if((input->info()->dimension(0) % 4) != 0) + { + build_opts.insert("#define NON_MULTIPLE_OF_4"); + } + + // Create kernel + _kernel = static_cast(GCKernelLibrary::get().create_kernel("softmax_layer_max", build_opts)); + + _kernel.clear_params(); + + // Set fixed arguments + unsigned int idx = 2 * num_arguments_per_3D_tensor(); //Skip the input and output parameters + _kernel.set_params(idx++, input->info()->dimension(0)); + + // Configure kernel window + // The kernel loops over all elements in steps of 4 + const unsigned int num_elems_processed_per_iteration = ceil_to_multiple(input->info()->dimension(0), 4); + unsigned int num_elems_written_per_iteration = 1; + if(input->info()->data_type() == DataType::F16) + { + num_elems_written_per_iteration = 2; + } + + Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); + AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); + AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); + + update_window_and_padding(win, input_access, output_access); + + output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); + + // set shader params binding point + _kernel.set_shader_params_binding_point(0); + + IGCKernel::configure(win); +} + +void GCLogits1DMaxKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); + + Window slice = window.first_slice_window_3D(); + + _kernel.use(); + + do + { + unsigned int idx1 = 0; + switch(_input->info()->data_type()) + { + case DataType::F16: + add_3D_tensor_argument(idx1, _input, BufferParam(1, 2), slice); + add_3D_tensor_argument(idx1, _output, BufferParam(2, 2), slice); + break; + + case DataType::F32: + add_3D_tensor_argument(idx1, _input, BufferParam(1, 2), slice); + add_3D_tensor_argument(idx1, _output, BufferParam(2, 2), slice); + break; + + default: + ARM_COMPUTE_ERROR("Current data type is mot supported"); + break; + } + + _kernel.update_shader_params(); + enqueue(*this, slice); + } + while(window.slide_window_slice_3D(slice)); +} + +GCLogits1DShiftExpSumKernel::GCLogits1DShiftExpSumKernel() + : _input(nullptr), _max(nullptr), _output(nullptr), _sum(nullptr) +{ +} + +void GCLogits1DShiftExpSumKernel::configure(const IGCTensor *input, const IGCTensor *max, IGCTensor *output, IGCTensor *sum) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_NULLPTR(max, sum, output); + + // Output auto initialization if not yet initialized + auto_init_if_empty(*sum->info(), max->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position()); + auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position()); + + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, max, sum); + ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); + ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(max, sum); + + _input = input; + _max = max; + _output = output; + _sum = sum; + + // Set build options + std::set build_opts; + std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; + build_opts.insert("#define " + dt_name); + build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); + build_opts.insert("#define SOFTMAX_LAYER_SHIFT_EXP_SUM"); + + // Tell the kernel that the width is not a multiple of 4 + if((input->info()->dimension(0) % 4) != 0) + { + build_opts.insert("#define NON_MULTIPLE_OF_4"); + } + + // Create kernel + _kernel = static_cast(GCKernelLibrary::get().create_kernel("softmax_layer_shift_exp_sum", build_opts)); + + _kernel.clear_params(); + + // Set fixed arguments + unsigned int idx = 4 * num_arguments_per_3D_tensor(); //Skip the input and output parameters + _kernel.set_params(idx++, input->info()->dimension(0)); + + // Configure window + // The kernel loops over all elements in steps of 4 + const unsigned int num_elems_processed_per_iteration = ceil_to_multiple(input->info()->dimension(0), 4); + unsigned int num_elems_written_per_iteration = 1; + if(input->info()->data_type() == DataType::F16) + { + num_elems_written_per_iteration = 2; + } + + Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); + + AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); + AccessWindowHorizontal max_access(max->info(), 0, num_elems_written_per_iteration); + AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); + AccessWindowHorizontal sum_access(sum->info(), 0, num_elems_written_per_iteration); + + update_window_and_padding(win, input_access, max_access, output_access, sum_access); + + output_access.set_valid_region(win, input->info()->valid_region()); + sum_access.set_valid_region(win, ValidRegion(Coordinates(), sum->info()->tensor_shape())); + + // set shader params binding point + _kernel.set_shader_params_binding_point(0); + + IGCKernel::configure(win); +} + +void GCLogits1DShiftExpSumKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); + + Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ); + Window slice = window_collapsed.first_slice_window_3D(); + + _kernel.use(); + + do + { + unsigned int idx = 0; + switch(_input->info()->data_type()) + { + case DataType::F16: + add_3D_tensor_argument(idx, _input, BufferParam(1, 2), slice); + add_3D_tensor_argument(idx, _max, BufferParam(2, 2), slice); + add_3D_tensor_argument(idx, _output, BufferParam(3, 2), slice); + add_3D_tensor_argument(idx, _sum, BufferParam(4, 2), slice); + break; + + case DataType::F32: + add_3D_tensor_argument(idx, _input, BufferParam(1, 2), slice); + add_3D_tensor_argument(idx, _max, BufferParam(2, 2), slice); + add_3D_tensor_argument(idx, _output, BufferParam(3, 2), slice); + add_3D_tensor_argument(idx, _sum, BufferParam(4, 2), slice); + break; + + default: + ARM_COMPUTE_ERROR("Current data type is mot supported"); + break; + } + + _kernel.update_shader_params(); + enqueue(*this, slice); + } + while(window_collapsed.slide_window_slice_3D(slice)); +} + +GCLogits1DNormKernel::GCLogits1DNormKernel() + : _input(nullptr), _sum(nullptr), _output(nullptr) +{ +} + +void GCLogits1DNormKernel::configure(const IGCTensor *input, const IGCTensor *sum, IGCTensor *output) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_NULLPTR(sum, output); + + // Output auto initialization if not yet initialized + auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position()); + + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, sum, output); + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, sum, output); + ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); + + _input = input; + _sum = sum; + _output = output; + + // Set build options + std::set build_opts; + std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; + build_opts.insert("#define " + dt_name); + build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); + build_opts.insert("#define SOFTMAX_LAYER_NORM"); + + // Create kernel + _kernel = static_cast(GCKernelLibrary::get().create_kernel("softmax_layer_norm", build_opts)); + + // Configure window + constexpr unsigned int num_elems_processed_per_iteration = 4; + unsigned int num_elems_written_per_iteration = 1; + if(input->info()->data_type() == DataType::F16) + { + num_elems_written_per_iteration = 2; + } + + Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); + + AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); + AccessWindowStatic sum_access(sum->info(), 0, 0, num_elems_written_per_iteration, sum->info()->dimension(1)); + AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); + + update_window_and_padding(win, input_access, sum_access, output_access); + + output_access.set_valid_region(win, input->info()->valid_region()); + + _kernel.clear_params(); + + // set shader params binding point + _kernel.set_shader_params_binding_point(0); + + IGCKernel::configure(win); +} + +void GCLogits1DNormKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); + + Window window_collapsed = window.collapse_if_possible(IGCKernel::window(), Window::DimZ); + Window slice = window_collapsed.first_slice_window_3D(); + + _kernel.use(); + + do + { + Window sum_slice = slice; + sum_slice.set(Window::DimX, Window::Dimension(0, 1, 1)); + + unsigned int idx1 = 0; + switch(_input->info()->data_type()) + { + case DataType::F16: + add_3D_tensor_argument(idx1, _input, BufferParam(1, 2), slice); + add_3D_tensor_argument(idx1, _sum, BufferParam(2, 2), slice); + add_3D_tensor_argument(idx1, _output, BufferParam(3, 2), slice); + break; + + case DataType::F32: + add_3D_tensor_argument(idx1, _input, BufferParam(1, 2), slice); + add_3D_tensor_argument(idx1, _sum, BufferParam(2, 2), slice); + add_3D_tensor_argument(idx1, _output, BufferParam(3, 2), slice); + break; + + default: + ARM_COMPUTE_ERROR("Current data type is mot supported"); + break; + } + + _kernel.update_shader_params(); + enqueue(*this, slice); + } + while(window_collapsed.slide_window_slice_3D(slice)); +} diff --git a/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp new file mode 100644 index 0000000000..b891b42ef8 --- /dev/null +++ b/src/core/GLES_COMPUTE/kernels/GCTransposeKernel.cpp @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h" + +#include "arm_compute/core/AccessWindowTranspose.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/Types.h" + +#include +#include + +using namespace arm_compute; + +void GCTransposeKernel::configure(const IGCTensor *input, IGCTensor *output) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_NULLPTR(output); + + TensorShape output_shape{ input->info()->tensor_shape() }; + const size_t w_out = input->info()->dimension(1); + const size_t h_out = input->info()->dimension(0); + output_shape.set(0, w_out); + output_shape.set(1, h_out); + + // Output tensor auto inizialitation if not yet initialized + auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position()); + + ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + + _input = input; + _output = output; + + std::set build_opts; + std::string dt_name = (input->info()->data_type() == DataType::F32) ? "DATA_TYPE_FP32" : "DATA_TYPE_FP16"; + build_opts.emplace(("#define " + dt_name)); + build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(1)); + build_opts.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(1)); + + // Create kernel + _kernel = static_cast(GCKernelLibrary::get().create_kernel("transpose", build_opts)); + + _kernel.clear_params(); + + // Configure kernel window + const unsigned int num_elems_processed_per_iteration = 4; + + Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration, num_elems_processed_per_iteration)); + + AccessWindowRectangle input_access(input->info(), 0, 0, num_elems_processed_per_iteration, num_elems_processed_per_iteration); + AccessWindowTranspose output_access(output->info(), 0, 0, num_elems_processed_per_iteration, num_elems_processed_per_iteration); + update_window_and_padding(win, input_access, output_access); + + output_access.set_valid_region(win, input->info()->valid_region()); + + // set shader params binding point + _kernel.set_shader_params_binding_point(0); + + IGCKernel::configure(win); +} + +void GCTransposeKernel::run(const Window &window) +{ + ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); + ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IGCKernel::window(), window); + + _kernel.use(); + + Window slice = window.first_slice_window_2D(); + + do + { + unsigned int idx = 0; + if(_input->info()->data_type() == DataType::F32) + { + add_2D_tensor_argument(idx, _input, 1, slice); + add_2D_tensor_argument(idx, _output, 2, slice); + } + else if(_input->info()->data_type() == DataType::F16) + { + add_2D_tensor_argument(idx, _input, BufferParam(1, 3), slice); + add_2D_tensor_argument(idx, _output, BufferParam(2, 3), slice); + } + + _kernel.update_shader_params(); + enqueue(*this, slice); + } + while(window.slide_window_slice_2D(slice)); +} diff --git a/src/core/Helpers.cpp b/src/core/Helpers.cpp index fc0b6e9361..151d7de9a4 100644 --- a/src/core/Helpers.cpp +++ b/src/core/Helpers.cpp @@ -106,6 +106,13 @@ Window arm_compute::calculate_max_enlarged_window(const ITensorInfo &info, const ++n; } + if(tensor_shape.num_dimensions() > 2) + { + window.set(2, Window::Dimension(0, std::max(1, tensor_shape[n]), steps[2])); + + ++n; + } + for(; n < Coordinates::num_max_dimensions; ++n) { window.set(n, Window::Dimension(0, std::max(1, tensor_shape[n]))); diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp index bd6911fd2b..af864f57f7 100644 --- a/src/core/Utils.cpp +++ b/src/core/Utils.cpp @@ -353,6 +353,7 @@ void arm_compute::print_consecutive_elements(std::ostream &s, DataType dt, const print_consecutive_elements_impl(s, reinterpret_cast(ptr), n, stream_width, element_delim); break; case DataType::F16: + print_consecutive_elements_impl(s, reinterpret_cast(ptr), n, stream_width, element_delim); break; default: ARM_COMPUTE_ERROR("Undefined element size for given data type"); @@ -380,7 +381,7 @@ int arm_compute::max_consecutive_elements_display_width(std::ostream &s, DataTyp case DataType::F32: return max_consecutive_elements_display_width_impl(s, reinterpret_cast(ptr), n); case DataType::F16: - return 0; + return max_consecutive_elements_display_width_impl(s, reinterpret_cast(ptr), n); default: ARM_COMPUTE_ERROR("Undefined element size for given data type"); } diff --git a/src/runtime/CL/functions/CLNormalizationLayer.cpp b/src/runtime/CL/functions/CLNormalizationLayer.cpp index f4bd49406c..648ce6b3a6 100644 --- a/src/runtime/CL/functions/CLNormalizationLayer.cpp +++ b/src/runtime/CL/functions/CLNormalizationLayer.cpp @@ -37,7 +37,7 @@ CLNormalizationLayer::CLNormalizationLayer() { } -void CLNormalizationLayer::configure(ICLTensor *input, ICLTensor *output, NormalizationLayerInfo norm_info) +void CLNormalizationLayer::configure(ICLTensor *input, ICLTensor *output, const NormalizationLayerInfo &norm_info) { ARM_COMPUTE_ERROR_ON(input == nullptr); diff --git a/src/runtime/GLES_COMPUTE/GCScheduler.cpp b/src/runtime/GLES_COMPUTE/GCScheduler.cpp new file mode 100644 index 0000000000..b2235ea6f9 --- /dev/null +++ b/src/runtime/GLES_COMPUTE/GCScheduler.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" + +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" + +using namespace arm_compute; + +GCScheduler::GCScheduler() = default; + +void GCScheduler::default_init() +{ + GCKernelLibrary::get().init("./cs_shaders/"); +} + +void GCScheduler::init(EGLDisplay dpy, EGLContext ctx) +{ + GCKernelLibrary::get().init("./cs_shaders/", dpy, ctx); +} + +GCScheduler &GCScheduler::get() +{ + static GCScheduler scheduler; + return scheduler; +} + +void GCScheduler::enqueue(IGCKernel &kernel, bool flush) +{ + kernel.run(kernel.window()); + if(flush) + { + ARM_COMPUTE_GL_CHECK(glFlush()); + } +} + +void GCScheduler::sync() +{ + ARM_COMPUTE_GL_CHECK(glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT)); +} diff --git a/src/runtime/GLES_COMPUTE/GCTensor.cpp b/src/runtime/GLES_COMPUTE/GCTensor.cpp new file mode 100644 index 0000000000..edbd16dc1d --- /dev/null +++ b/src/runtime/GLES_COMPUTE/GCTensor.cpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" + +using namespace arm_compute; + +GCTensor::GCTensor() + : _allocator() +{ +} + +ITensorAllocator *GCTensor::allocator() +{ + return &_allocator; +} + +TensorInfo *GCTensor::info() const +{ + return &_allocator.info(); +} + +TensorInfo *GCTensor::info() +{ + return &_allocator.info(); +} + +uint8_t *GCTensor::buffer() const +{ + return _allocator.data(); +} + +GLuint GCTensor::gc_buffer() const +{ + return _allocator.get_gl_ssbo_name(); +} + +void GCTensor::map(bool blocking) +{ + IGCTensor::map(blocking); +} + +void GCTensor::unmap() +{ + IGCTensor::unmap(); +} + +uint8_t *GCTensor::do_map(bool blocking) +{ + return _allocator.map(blocking); +} + +void GCTensor::do_unmap() +{ + _allocator.unmap(); +} \ No newline at end of file diff --git a/src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp b/src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp new file mode 100644 index 0000000000..694b34f1ec --- /dev/null +++ b/src/runtime/GLES_COMPUTE/GCTensorAllocator.cpp @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" +#include "support/ToolchainSupport.h" + +using namespace arm_compute; + +GCTensorAllocator::GCTensorAllocator() + : _gl_buffer(), _mapping(nullptr) +{ +} + +uint8_t *GCTensorAllocator::data() +{ + return _mapping; +} + +void GCTensorAllocator::allocate() +{ + _gl_buffer = support::cpp14::make_unique(); + ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, _gl_buffer->_ssbo_name)); + ARM_COMPUTE_GL_CHECK(glBufferData(GL_SHADER_STORAGE_BUFFER, static_cast(info().total_size()), nullptr, GL_STATIC_DRAW)); + ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0)); + info().set_is_resizable(false); +} + +void GCTensorAllocator::free() +{ + _gl_buffer.reset(); + info().set_is_resizable(true); +} + +uint8_t *GCTensorAllocator::lock() +{ + return map(true); +} + +void GCTensorAllocator::unlock() +{ + unmap(); +} + +GLuint GCTensorAllocator::get_gl_ssbo_name() const +{ + return _gl_buffer->_ssbo_name; +} + +uint8_t *GCTensorAllocator::map(bool blocking) +{ + ARM_COMPUTE_ERROR_ON(_mapping != nullptr); + ARM_COMPUTE_UNUSED(blocking); + + ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, _gl_buffer->_ssbo_name)); + void *p = ARM_COMPUTE_GL_CHECK(glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, static_cast(info().total_size()), GL_MAP_READ_BIT | GL_MAP_WRITE_BIT)); + _mapping = reinterpret_cast(p); + + return _mapping; +} + +void GCTensorAllocator::unmap() +{ + ARM_COMPUTE_ERROR_ON(_mapping == nullptr); + + ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, _gl_buffer->_ssbo_name)); + ARM_COMPUTE_GL_CHECK(glUnmapBuffer(GL_SHADER_STORAGE_BUFFER)); + ARM_COMPUTE_GL_CHECK(glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0)); + _mapping = nullptr; +} \ No newline at end of file diff --git a/src/runtime/GLES_COMPUTE/IGCSimpleFunction.cpp b/src/runtime/GLES_COMPUTE/IGCSimpleFunction.cpp new file mode 100644 index 0000000000..19f178f445 --- /dev/null +++ b/src/runtime/GLES_COMPUTE/IGCSimpleFunction.cpp @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/GLES_COMPUTE/IGCSimpleFunction.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" + +using namespace arm_compute; + +IGCSimpleFunction::IGCSimpleFunction() //NOLINT + : _kernel(), + _border_handler() +{ +} + +void IGCSimpleFunction::run() +{ + ARM_COMPUTE_ERROR_ON_MSG(!_kernel, "The child class didn't set the GLES kernel or function isn't configured"); + + // FIXME(APPBROWSER-300): We may need to rename "enqueue" to "dispatch" and "sync" to "memory_barrier". + GCScheduler::get().enqueue(_border_handler, false); + GCScheduler::get().sync(); + GCScheduler::get().enqueue(*_kernel); +} diff --git a/src/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.cpp b/src/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.cpp new file mode 100644 index 0000000000..781b357ce7 --- /dev/null +++ b/src/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCAbsoluteDifference.h" + +#include "arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h" +#include "arm_compute/core/Helpers.h" +#include "support/ToolchainSupport.h" + +#include + +using namespace arm_compute; + +void GCAbsoluteDifference::configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input1, input2, output); + _kernel = std::move(k); +} diff --git a/src/runtime/GLES_COMPUTE/functions/GCActivationLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCActivationLayer.cpp new file mode 100644 index 0000000000..8686416616 --- /dev/null +++ b/src/runtime/GLES_COMPUTE/functions/GCActivationLayer.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h" + +#include "arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h" +#include "arm_compute/core/Helpers.h" +#include "support/ToolchainSupport.h" + +using namespace arm_compute; + +void GCActivationLayer::configure(IGCTensor *input, IGCTensor *output, ActivationLayerInfo act_info) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input, output, act_info); + _kernel = std::move(k); +} diff --git a/src/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.cpp new file mode 100755 index 0000000000..2e546a663a --- /dev/null +++ b/src/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.cpp @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" + +using namespace arm_compute; + +GCBatchNormalizationLayer::GCBatchNormalizationLayer() + : _norm_kernel() +{ +} + +void GCBatchNormalizationLayer::configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta, const IGCTensor *gamma, float epsilon) +{ + _norm_kernel.configure(input, output, mean, var, beta, gamma, epsilon); +} + +void GCBatchNormalizationLayer::run() +{ + GCScheduler::get().enqueue(_norm_kernel, true); +} diff --git a/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.cpp b/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.cpp new file mode 100755 index 0000000000..ed756cf261 --- /dev/null +++ b/src/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" +#include "support/ToolchainSupport.h" + +using namespace arm_compute; + +GCDepthConcatenate::GCDepthConcatenate() //NOLINT + : _concat_kernels_vector(), + _border_handlers_vector(), + _num_inputs(0) +{ +} + +void GCDepthConcatenate::configure(std::vector inputs_vector, IGCTensor *output) //NOLINT +{ + ARM_COMPUTE_ERROR_ON(inputs_vector.size() < 2); + + _num_inputs = inputs_vector.size(); + + unsigned int depth_offset = 0; + + _concat_kernels_vector = arm_compute::support::cpp14::make_unique(_num_inputs); + _border_handlers_vector = arm_compute::support::cpp14::make_unique(_num_inputs); + + for(unsigned int i = 0; i < _num_inputs; i++) + { + _concat_kernels_vector[i].configure(inputs_vector.at(i), depth_offset, output); + _border_handlers_vector[i].configure(inputs_vector.at(i), _concat_kernels_vector[i].border_size(), BorderMode::CONSTANT, PixelValue(0)); + + depth_offset += inputs_vector.at(i)->info()->dimension(2); + } +} + +void GCDepthConcatenate::run() +{ + for(unsigned i = 0; i < _num_inputs; i++) + { + GCScheduler::get().enqueue(_border_handlers_vector[i], false); + GCScheduler::get().enqueue(_concat_kernels_vector[i], true); + } +} diff --git a/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp new file mode 100644 index 0000000000..ae9dd51b8e --- /dev/null +++ b/src/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h" + +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Utils.h" +#include "support/ToolchainSupport.h" + +using namespace arm_compute; + +void GCDirectConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info) +{ + int kernel_size = weights->info()->dimension(0); + + if(kernel_size == 1) + { + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input, weights, biases, output, conv_info); + _kernel = std::move(k); + } + else if(kernel_size == 3) + { + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input, weights, biases, output, conv_info); + _kernel = std::move(k); + } + else if(kernel_size == 5) + { + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input, weights, biases, output, conv_info); + _kernel = std::move(k); + } + else + { + ARM_COMPUTE_ERROR("kernel size unsupported!"); + return; + } + + _border_handler.configure(input, _kernel->border_size(), BorderMode::CONSTANT, PixelValue(0)); +} diff --git a/src/runtime/GLES_COMPUTE/functions/GCDropoutLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDropoutLayer.cpp new file mode 100644 index 0000000000..032c2fdb1e --- /dev/null +++ b/src/runtime/GLES_COMPUTE/functions/GCDropoutLayer.cpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h" + +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" + +using namespace arm_compute; + +GCDropoutLayer::GCDropoutLayer() + : _dropout_kernel() +{ +} + +void GCDropoutLayer::configure(const IGCTensor *input, IGCTensor *mask, IGCTensor *output, float ratio, bool forward) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, mask, output); + + // Configure kernel + _dropout_kernel.configure(input, mask, output, ratio, forward); +} + +void GCDropoutLayer::run() +{ + GCScheduler::get().enqueue(_dropout_kernel); +} diff --git a/src/runtime/GLES_COMPUTE/functions/GCFillBorder.cpp b/src/runtime/GLES_COMPUTE/functions/GCFillBorder.cpp new file mode 100644 index 0000000000..5c2431fa13 --- /dev/null +++ b/src/runtime/GLES_COMPUTE/functions/GCFillBorder.cpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCFillBorder.h" + +#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h" +#include "arm_compute/core/Helpers.h" +#include "support/ToolchainSupport.h" + +#include + +using namespace arm_compute; + +void GCFillBorder::configure(IGCTensor *tensor, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(tensor, BorderSize(border_width), border_mode, constant_border_value); + _kernel = std::move(k); +} diff --git a/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp new file mode 100644 index 0000000000..63cb40e616 --- /dev/null +++ b/src/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.cpp @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h" + +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" +#include "support/ToolchainSupport.h" + +#include + +using namespace arm_compute; + +void GCFullyConnectedLayerReshapeWeights::configure(const IGCTensor *input, IGCTensor *output) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input, output); + _kernel = std::move(k); +} + +GCFullyConnectedLayer::GCFullyConnectedLayer() + : _im2col_kernel(), _reshape_weights_kernel(), _mm_kernel(), _accumulate_biases_kernel(), _im2col_output(), _reshape_weights_output(), _are_weights_reshaped(true), _is_fc_after_conv(true), + _accumulate_biases(false) +{ +} + +void GCFullyConnectedLayer::configure_conv_fc(const IGCTensor *input, const IGCTensor *weights, IGCTensor *output) +{ + ARM_COMPUTE_ERROR_ON((weights->info()->dimension(1) != (input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2)))); + + const DataType dt = input->info()->data_type(); + + // If the fully connected layer is called after a convolution layer, the input tensor must be linearized + + // Initialize output tensor for im2col + TensorShape shape_im2col; + shape_im2col.set(0, input->info()->dimension(0) * input->info()->dimension(1) * input->info()->dimension(2)); + shape_im2col.set(1, input->info()->dimension(3)); + shape_im2col.set(2, input->info()->dimension(4)); + shape_im2col.set(3, input->info()->dimension(5)); + _im2col_output.allocator()->init(TensorInfo(shape_im2col, 1, dt)); + + // Configure im2col kernel + _im2col_kernel.configure(input, &_im2col_output, std::make_pair(1, 1), PadStrideInfo(1, 1, 0, 0), false); + + // Configure matrix multiply kernel + _mm_kernel.configure(&_im2col_output, weights, output, 1.0f, false); + + // Allocate the output tensor for im2col once all the configure methods have been called + _im2col_output.allocator()->allocate(); +} + +void GCFullyConnectedLayer::configure_fc_fc(const IGCTensor *input, const IGCTensor *weights, IGCTensor *output) +{ + ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != weights->info()->dimension(1)); + + // Configure matrix multiply kernel + _mm_kernel.configure(input, weights, output, 1.0f, false); +} + +void GCFullyConnectedLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, bool transpose_weights, bool are_weights_reshaped) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); + ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 2); + + _are_weights_reshaped = transpose_weights ? are_weights_reshaped : true; + _is_fc_after_conv = true; + _accumulate_biases = false; + + if(biases != nullptr) + { + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); + + _accumulate_biases = true; + + // Configure accumulate biases kernel + _accumulate_biases_kernel.configure(output, biases); + } + + // With the Fully Connected layer we can have 4 different cases: + // 1) Convolution layer -> Fully Connected layer without batches + // 2) Fully Connected layer -> Fully Connected layer without batches + // 3) Convolution layer -> Fully Connected layer with batches + // 4) Fully Connected layer -> Fully Connected layer with batches + + const IGCTensor *weights_to_use = weights; + + if(!_are_weights_reshaped) + { + weights_to_use = &_reshape_weights_output; + + // Reshape the weights + _reshape_weights_kernel.configure(weights, &_reshape_weights_output); + } + + // Check if we have a fully connected layer with batches + const bool is_batched_fc_layer = output->info()->dimension(1) > 1; + + if(is_batched_fc_layer) + { + _is_fc_after_conv = (TensorShape::num_max_dimensions >= 4) && (std::equal(input->info()->tensor_shape().cbegin() + 3, + input->info()->tensor_shape().cend(), + output->info()->tensor_shape().cbegin() + 1)); + } + else + { + _is_fc_after_conv = input->info()->num_dimensions() > 1; + } + + if(_is_fc_after_conv) + { + // Fully Connected layer after a Convolution Layer without batches + configure_conv_fc(input, weights_to_use, output); + } + else + { + // Fully Connected layer after a Fully Connected Layer without batches + configure_fc_fc(input, weights_to_use, output); + } + + // Allocate the transpose tensor if the are_weights_reshaped flag is false and once all the configure methods have been called + if(!_are_weights_reshaped) + { + // Allocate the tensor for the weights reshaped + _reshape_weights_output.allocator()->allocate(); + } +} + +void GCFullyConnectedLayer::run() +{ + // Reshape of the weights (happens only once) + if(!_are_weights_reshaped) + { + _are_weights_reshaped = true; + _reshape_weights_kernel.run(); + } + + // Linearize input if it comes from a convolutional layer + if(_is_fc_after_conv) + { + GCScheduler::get().enqueue(_im2col_kernel, false); + } + + GCScheduler::get().sync(); + + // Run matrix multiply + GCScheduler::get().enqueue(_mm_kernel, !_accumulate_biases); + + // Accumulate biases if provided + if(_accumulate_biases) + { + GCScheduler::get().sync(); + + GCScheduler::get().enqueue(_accumulate_biases_kernel); + } +} diff --git a/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp new file mode 100644 index 0000000000..c47a0e71fb --- /dev/null +++ b/src/runtime/GLES_COMPUTE/functions/GCGEMM.cpp @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/GLES_COMPUTE/GCHelpers.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" +#include "arm_compute/runtime/ITensorAllocator.h" + +using namespace arm_compute; + +GCGEMM::GCGEMM() + : _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _ma_kernel(), _tmp_a(), _tmp_b(), _is_interleaved_transposed(false), _run_addition(false) +{ +} + +void GCGEMM::configure(const IGCTensor *a, const IGCTensor *b, const IGCTensor *c, IGCTensor *output, float alpha, float beta) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(a, 1, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(a, b, output); + + if(c != nullptr) + { + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(a, c); + ARM_COMPUTE_ERROR_ON_MSG(a->info()->dimension(1) != c->info()->dimension(1), "The C matrix must have the same number of rows as the matrix A"); + ARM_COMPUTE_ERROR_ON_MSG(b->info()->dimension(0) != c->info()->dimension(0), "The C matrix must have the same number of columns as the matrix C"); + ARM_COMPUTE_ERROR_ON_MSG(c->info()->dimension(0) != output->info()->dimension(0), "The C matrix must have the same number of rows as the output matrix"); + ARM_COMPUTE_ERROR_ON_MSG(c->info()->dimension(1) != output->info()->dimension(1), "The C matrix must have the same number of columns as the output matrix"); + } + + ARM_COMPUTE_ERROR_ON_MSG(a->info()->dimension(0) != b->info()->dimension(1), "The product AB is defined only if the number of columns in A is equal to the number of rows in B"); + + // If the input tensor has less than 16 rows, we run a special version of GEMM without reshaping the input tensors + _is_interleaved_transposed = a->info()->dimension(1) > 16; + + const IGCTensor *matrix_a = a; + const IGCTensor *matrix_b = b; + + if(_is_interleaved_transposed) + { + matrix_a = &_tmp_a; + matrix_b = &_tmp_b; + + TensorShape shape_tmp_a = a->info()->tensor_shape(); + TensorShape shape_tmp_b = b->info()->tensor_shape(); + + shape_tmp_a.set(0, a->info()->dimension(0) * 4); + shape_tmp_a.set(1, std::ceil(a->info()->dimension(1) / 4.0f)); + + const unsigned int transpose_w = max_gc_vector_width / data_size_from_type(b->info()->data_type()); + shape_tmp_b.set(0, b->info()->dimension(1) * transpose_w); + shape_tmp_b.set(1, std::ceil(b->info()->dimension(0) / static_cast(transpose_w))); + + TensorInfo info_a(shape_tmp_a, 1, a->info()->data_type(), a->info()->fixed_point_position()); + _tmp_a.allocator()->init(info_a); + + TensorInfo info_b(shape_tmp_b, 1, b->info()->data_type(), b->info()->fixed_point_position()); + _tmp_b.allocator()->init(info_b); + + // Configure interleave kernel + _interleave_kernel.configure(a, &_tmp_a); + + // Configure transpose kernel + _transpose_kernel.configure(b, &_tmp_b); + } + + _mm_kernel.configure(matrix_a, matrix_b, output, alpha, _is_interleaved_transposed); + + if(_is_interleaved_transposed) + { + // Allocate intermediate tensors + _tmp_a.allocator()->allocate(); + _tmp_b.allocator()->allocate(); + } + + // Configure matrix addition kernel + if(beta != 0 && c != nullptr) + { + _ma_kernel.configure(c, output, beta); + _run_addition = true; + } +} + +void GCGEMM::run() +{ + if(_is_interleaved_transposed) + { + // Run interleave kernel + GCScheduler::get().enqueue(_interleave_kernel, false); + + // Run transpose kernel + GCScheduler::get().enqueue(_transpose_kernel, false); + } + + // Run matrix multiply kernel + GCScheduler::get().enqueue(_mm_kernel, !_run_addition); + + // Run matrix addition kernel + if(_run_addition) + { + GCScheduler::get().enqueue(_ma_kernel); + } +} diff --git a/src/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.cpp b/src/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.cpp new file mode 100644 index 0000000000..44c940e126 --- /dev/null +++ b/src/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.cpp @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMInterleave4x4.h" + +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h" +#include "support/ToolchainSupport.h" + +using namespace arm_compute; + +void GCGEMMInterleave4x4::configure(const IGCTensor *input, IGCTensor *output) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input, output); + _kernel = std::move(k); +} diff --git a/src/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.cpp b/src/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.cpp new file mode 100644 index 0000000000..893fa5572b --- /dev/null +++ b/src/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCGEMMTranspose1xW.h" + +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h" +#include "arm_compute/core/Types.h" +#include "support/ToolchainSupport.h" + +using namespace arm_compute; + +void GCGEMMTranspose1xW::configure(const IGCTensor *input, IGCTensor *output) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input, output); + _kernel = std::move(k); +} diff --git a/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp new file mode 100644 index 0000000000..d30ed52d5c --- /dev/null +++ b/src/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h" + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Validate.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" + +using namespace arm_compute; + +GCNormalizationLayer::GCNormalizationLayer() + : _squared_input(), _norm_kernel(), _multiply_kernel(), _border_handler() +{ +} + +void GCNormalizationLayer::configure(const IGCTensor *input, IGCTensor *output, const NormalizationLayerInfo &norm_info) +{ + ARM_COMPUTE_ERROR_ON(input == nullptr); + + _squared_input.allocator()->init(TensorInfo(input->info()->tensor_shape(), 1, input->info()->data_type())); + + _norm_kernel.configure(input, &_squared_input, output, norm_info); + _multiply_kernel.configure(input, input, &_squared_input, 1.0f); + // Fill the border by 3 elements since we need vload4 in the IN_MAP normalization kernel + _border_handler.configure(&_squared_input, _norm_kernel.border_size(), BorderMode::CONSTANT, PixelValue(0)); + + // Allocate intermediate buffers + _squared_input.allocator()->allocate(); +} + +void GCNormalizationLayer::run() +{ + GCScheduler::get().enqueue(_multiply_kernel, false); + GCScheduler::get().enqueue(_border_handler, false); + GCScheduler::get().enqueue(_norm_kernel, false); +} diff --git a/src/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.cpp b/src/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.cpp new file mode 100755 index 0000000000..0cd87ea875 --- /dev/null +++ b/src/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCPixelWiseMultiplication.h" + +#include "arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h" +#include "support/ToolchainSupport.h" + +#include + +using namespace arm_compute; + +void GCPixelWiseMultiplication::configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output, float scale) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input1, input2, output, scale); + _kernel = std::move(k); +} diff --git a/src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp new file mode 100644 index 0000000000..46a60cddef --- /dev/null +++ b/src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h" + +#include "arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h" +#include "arm_compute/core/PixelValue.h" +#include "support/ToolchainSupport.h" + +using namespace arm_compute; + +void GCPoolingLayer::configure(IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info) +{ + // Configure pooling kernel + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input, output, pool_info); + _kernel = std::move(k); + + // Configure border depending on operation required + BorderMode border_mode = (PoolingType::MAX == pool_info.pool_type()) ? BorderMode::REPLICATE : BorderMode::CONSTANT; + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(0.0f)); +} diff --git a/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp new file mode 100644 index 0000000000..d7d47d2802 --- /dev/null +++ b/src/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.cpp @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h" + +#include "arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h" +#include "arm_compute/core/Helpers.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" + +using namespace arm_compute; + +GCSoftmaxLayer::GCSoftmaxLayer() + : _max_kernel(), _shift_exp_sum_kernel(), _norm_kernel(), _max(), _sum(), _tmp() +{ +} + +void GCSoftmaxLayer::configure(const IGCTensor *input, IGCTensor *output) +{ + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + + // Create intermediate tensors shapes + _tmp.allocator()->init(TensorInfo(input->info()->tensor_shape(), input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position())); + + TensorShape shape = input->info()->tensor_shape(); + shape.set(0, 1); + TensorInfo tensor_info_max_sum(shape, input->info()->num_channels(), input->info()->data_type(), input->info()->fixed_point_position()); + _max.allocator()->init(tensor_info_max_sum); + _sum.allocator()->init(tensor_info_max_sum); + + // Configure Kernels + _max_kernel.configure(input, &_max); + _shift_exp_sum_kernel.configure(input, &_max, &_tmp, &_sum); + _norm_kernel.configure(&_tmp, &_sum, output); + + // Allocate intermediate buffers + _tmp.allocator()->allocate(); + _max.allocator()->allocate(); + _sum.allocator()->allocate(); +} + +void GCSoftmaxLayer::run() +{ + GCScheduler::get().enqueue(_max_kernel, false); + GCScheduler::get().enqueue(_shift_exp_sum_kernel, false); + GCScheduler::get().enqueue(_norm_kernel); +} diff --git a/src/runtime/GLES_COMPUTE/functions/GCTranspose.cpp b/src/runtime/GLES_COMPUTE/functions/GCTranspose.cpp new file mode 100644 index 0000000000..c2dc122e64 --- /dev/null +++ b/src/runtime/GLES_COMPUTE/functions/GCTranspose.cpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCTranspose.h" + +#include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h" +#include "support/ToolchainSupport.h" + +#include + +using namespace arm_compute; + +void GCTranspose::configure(const IGCTensor *input, IGCTensor *output) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input, output); + _kernel = std::move(k); +} diff --git a/src/runtime/NEON/functions/NENormalizationLayer.cpp b/src/runtime/NEON/functions/NENormalizationLayer.cpp index e01ef6660d..da4314b5ed 100644 --- a/src/runtime/NEON/functions/NENormalizationLayer.cpp +++ b/src/runtime/NEON/functions/NENormalizationLayer.cpp @@ -37,7 +37,7 @@ NENormalizationLayer::NENormalizationLayer(std::shared_ptr memor { } -void NENormalizationLayer::configure(const ITensor *input, ITensor *output, NormalizationLayerInfo norm_info) +void NENormalizationLayer::configure(const ITensor *input, ITensor *output, const NormalizationLayerInfo &norm_info) { ARM_COMPUTE_ERROR_ON(input == nullptr); diff --git a/tests/GLES_COMPUTE/GCAccessor.h b/tests/GLES_COMPUTE/GCAccessor.h new file mode 100644 index 0000000000..0f7c491c3c --- /dev/null +++ b/tests/GLES_COMPUTE/GCAccessor.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TEST_GCACCESSOR_H__ +#define __ARM_COMPUTE_TEST_GCACCESSOR_H__ + +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" +#include "tests/IAccessor.h" + +namespace arm_compute +{ +namespace test +{ +/** Accessor implementation for @ref GCTensor objects. */ +class GCAccessor : public IAccessor +{ +public: + /** Create an accessor for the given @p tensor. + * + * @param[in, out] tensor To be accessed tensor. + * + * @note The GLES memory is mapped by the constructor. + * + */ + GCAccessor(GCTensor &tensor); + + GCAccessor(const GCAccessor &) = delete; + GCAccessor &operator=(const GCAccessor &) = delete; + GCAccessor(GCAccessor &&) = default; + GCAccessor &operator=(GCAccessor &&) = default; + + /** Destructor that unmaps the GLES memory. */ + ~GCAccessor(); + + TensorShape shape() const override; + size_t element_size() const override; + size_t size() const override; + Format format() const override; + DataType data_type() const override; + int num_channels() const override; + int num_elements() const override; + PaddingSize padding() const override; + int fixed_point_position() const override; + QuantizationInfo quantization_info() const override; + const void *operator()(const Coordinates &coord) const override; + void *operator()(const Coordinates &coord) override; + +private: + GCTensor &_tensor; +}; + +inline GCAccessor::GCAccessor(GCTensor &tensor) + : _tensor{ tensor } +{ + _tensor.map(); +} + +inline GCAccessor::~GCAccessor() +{ + _tensor.unmap(); +} + +inline TensorShape GCAccessor::shape() const +{ + return _tensor.info()->tensor_shape(); +} + +inline size_t GCAccessor::element_size() const +{ + return _tensor.info()->element_size(); +} + +inline size_t GCAccessor::size() const +{ + return _tensor.info()->total_size(); +} + +inline Format GCAccessor::format() const +{ + return _tensor.info()->format(); +} + +inline DataType GCAccessor::data_type() const +{ + return _tensor.info()->data_type(); +} + +inline int GCAccessor::num_channels() const +{ + return _tensor.info()->num_channels(); +} + +inline int GCAccessor::num_elements() const +{ + return _tensor.info()->tensor_shape().total_size(); +} + +inline PaddingSize GCAccessor::padding() const +{ + return _tensor.info()->padding(); +} + +inline int GCAccessor::fixed_point_position() const +{ + return _tensor.info()->fixed_point_position(); +} + +inline QuantizationInfo GCAccessor::quantization_info() const +{ + return _tensor.info()->quantization_info(); +} + +inline const void *GCAccessor::operator()(const Coordinates &coord) const +{ + return _tensor.ptr_to_element(coord); +} + +inline void *GCAccessor::operator()(const Coordinates &coord) +{ + return _tensor.ptr_to_element(coord); +} +} // namespace test +} // namespace arm_compute +#endif /* __ARM_COMPUTE_TEST_GCACCESSOR_H__ */ diff --git a/tests/GLES_COMPUTE/Helper.h b/tests/GLES_COMPUTE/Helper.h new file mode 100644 index 0000000000..5f6460aa61 --- /dev/null +++ b/tests/GLES_COMPUTE/Helper.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_TEST_GC_HELPER_H__ +#define __ARM_COMPUTE_TEST_GC_HELPER_H__ + +#include "tests/Globals.h" + +#include "arm_compute/core/ITensor.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" + +#include + +namespace arm_compute +{ +namespace test +{ +namespace gles_compute +{ +/** Helper to create an empty tensor. + * + * @param[in] shape Desired shape. + * @param[in] data_type Desired data type. + * @param[in] num_channels (Optional) It indicates the number of channels for each tensor element + * @param[in] fixed_point_position (Optional) Fixed point position that expresses the number of bits for the fractional part of the number when the tensor's data type is QS8 or QS16. + * + * @return Empty @ref GCTensor with the specified shape and data type. + */ +inline GCTensor create_tensor(const TensorShape &shape, DataType data_type, int num_channels = 1, int fixed_point_position = 0) +{ + GCTensor tensor; + tensor.allocator()->init(TensorInfo(shape, num_channels, data_type, fixed_point_position)); + + return tensor; +} + +/** Helper to create an empty tensor. + * + * @param[in] name File name from which to get the dimensions. + * @param[in] data_type Desired data type. + * + * @return Empty @ref GCTensor with the specified shape and data type. + */ +inline GCTensor create_tensor(const std::string &name, DataType data_type) +{ + constexpr unsigned int num_channels = 1; + + const RawTensor &raw = library->get(name); + + GCTensor tensor; + tensor.allocator()->init(TensorInfo(raw.shape(), num_channels, data_type)); + + return tensor; +} + +/** Helper to print tensor. + * + * @param[in] tensor Tensor to print. + * @param[in] name Tensor name. + * @param[in] info Format information. + * + * @return Empty @ref GCTensor with the specified shape and data type. + */ +inline void print_tensor(ITensor &tensor, const std::string &name, IOFormatInfo info = IOFormatInfo(IOFormatInfo::PrintRegion::Full)) +{ + std::ostringstream s; + IGCTensor &t = dynamic_cast(tensor); + t.map(); + t.print(s, info); + + std::cout << name << ":" << std::endl; + std::cout << s.str().c_str(); + t.unmap(); + + return; +} + +/** Helper to sync tensor, if tensor is not used, GPU have optimized the operation. + * + * @param[in] tensor Tensor to be sync. + * + * @return Empty @ref GCTensor with the specified shape and data type. + */ +inline void force_sync_tensor(ITensor &tensor) +{ + IGCTensor &t = dynamic_cast(tensor); + t.map(); + t.unmap(); + + return; +} +} // namespace gles_compute +} // namespace test +} // namespace arm_compute +#endif /* __ARM_COMPUTE_TEST_GC_HELPER_H__ */ diff --git a/tests/SConscript b/tests/SConscript index 37c96d2f22..311eeedde1 100644 --- a/tests/SConscript +++ b/tests/SConscript @@ -68,6 +68,8 @@ test_env.Append(CPPPATH = ["#3rdparty/include"]) test_env.Append(LIBPATH = ["#3rdparty/%s/%s" % (env['os'], env['arch'])]) test_env.Append(LIBPATH = ["#build/%s" % env['build_dir']]) test_env.Append(LIBPATH = ["#build/%s/opencl-1.2-stubs" % env['build_dir']]) +if env['gles_compute'] and env['os'] != 'android': + test_env.Append(LIBPATH = ["#build/%s/opengles-3.1/stubs" % env['build_dir']]) Import("arm_compute_test_framework") test_env.Append(LIBS = arm_compute_test_framework) @@ -109,6 +111,27 @@ if env['neon']: files_validation += Glob('validation/NEON/*/' + filter_pattern) files_validation += Glob('validation/NEON/' + filter_pattern) +if env['gles_compute']: + if env['os'] != 'android': + Import('egl') + Import('glesv2') + + test_env.Append(CPPPATH = ["#opengles-3.1/include", "#opengles-3.1/mali_include"]) + test_env.Append(LIBS = ["EGL", "GLESv2"]) + else: + if env['arch'] != 'armv7a': + test_env.Append(LIBS = ["EGL", "GLESv3"]) + else: + test_env.Append(LIBS = ["EGL", "GLESv2"]) + + test_env.Append(CPPDEFINES=['ARM_COMPUTE_GC']) + + files_benchmark += Glob('benchmark/GLES_COMPUTE/*/*.cpp') + files_benchmark += Glob('benchmark/GLES_COMPUTE/*.cpp') + + files_validation += Glob('validation/GLES_COMPUTE/*/*.cpp') + files_validation += Glob('validation/GLES_COMPUTE/*.cpp') + if env['os'] == 'android': test_env.Append(LIBS = ["log"]) else: @@ -121,6 +144,9 @@ if test_env['benchmark_tests']: if env['opencl']: Depends(arm_compute_benchmark, opencl) + if env['gles_compute'] and env['os'] != 'android': + Depends(arm_compute_benchmark, egl) + Depends(arm_compute_benchmark, glesv2) Default(arm_compute_benchmark) Export('arm_compute_benchmark') @@ -132,6 +158,9 @@ if test_env['validation_tests']: if env['opencl']: Depends(arm_compute_validation, opencl) + if env['gles_compute'] and env['os'] != 'android': + Depends(arm_compute_validation, egl) + Depends(arm_compute_validation, glesv2) Default(arm_compute_validation) Export('arm_compute_validation') diff --git a/tests/benchmark/GLES_COMPUTE/DirectConvolutionLayer.cpp b/tests/benchmark/GLES_COMPUTE/DirectConvolutionLayer.cpp new file mode 100644 index 0000000000..89ca192f83 --- /dev/null +++ b/tests/benchmark/GLES_COMPUTE/DirectConvolutionLayer.cpp @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h" +#include "tests/GLES_COMPUTE/GCAccessor.h" +#include "tests/benchmark/fixtures/ConvolutionLayerFixture.h" +#include "tests/datasets/system_tests/alexnet/AlexNetConvolutionLayerDataset.h" +#include "tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1ConvolutionLayerDataset.h" +#include "tests/datasets/system_tests/googlenet/inceptionv4/GoogLeNetInceptionV4ConvolutionLayerDataset.h" +#include "tests/datasets/system_tests/squeezenet/SqueezeNetConvolutionLayerDataset.h" +#include "tests/datasets/system_tests/vgg/vgg16/VGG16ConvolutionLayerDataset.h" +#include "tests/datasets/system_tests/yolo/v2/YOLOV2ConvolutionLayerDataset.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "utils/TypePrinter.h" + +namespace arm_compute +{ +namespace test +{ +namespace +{ +const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::F16 }); +} // namespace + +using GCConvolutionLayerFixture = ConvolutionLayerFixture; + +TEST_SUITE(GC) + +REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetDirectConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::AlexNetDirectConvolutionLayerDataset(), + data_types), + framework::dataset::make("Batches", 1))); + +REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1DirectConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1DirectConvolutionLayerDataset(), + data_types), + framework::dataset::make("Batches", 1))); + +REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4DirectConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4DirectConvolutionLayerDataset(), + data_types), + framework::dataset::make("Batches", 1))); + +REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetDirectConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::SqueezeNetConvolutionLayerDataset(), + data_types), + framework::dataset::make("Batches", 1))); + +TEST_SUITE(NIGHTLY) +REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetDirectConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::AlexNetDirectConvolutionLayerDataset(), + data_types), + framework::dataset::make("Batches", { 4, 8 }))); + +REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1DirectConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1DirectConvolutionLayerDataset(), + data_types), + framework::dataset::make("Batches", { 4, 8 }))); + +REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4DirectConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4DirectConvolutionLayerDataset(), + data_types), + framework::dataset::make("Batches", { 4, 8 }))); + +REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetDirectConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::SqueezeNetConvolutionLayerDataset(), + data_types), + framework::dataset::make("Batches", { 4, 8 }))); + +REGISTER_FIXTURE_DATA_TEST_CASE(VGG16DirectConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::VGG16ConvolutionLayerDataset(), + data_types), + framework::dataset::make("Batches", { 1, 4, 8 }))); + +REGISTER_FIXTURE_DATA_TEST_CASE(YOLOV2DirectConvolutionLayer, GCConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2ConvolutionLayerDataset(), + data_types), + framework::dataset::make("Batches", { 1, 4, 8 }))); +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace test +} // namespace arm_compute diff --git a/tests/benchmark/GLES_COMPUTE/FullyConnectedLayer.cpp b/tests/benchmark/GLES_COMPUTE/FullyConnectedLayer.cpp new file mode 100644 index 0000000000..36578edbfb --- /dev/null +++ b/tests/benchmark/GLES_COMPUTE/FullyConnectedLayer.cpp @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITCLSS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h" +#include "tests/GLES_COMPUTE/GCAccessor.h" +#include "tests/benchmark/fixtures/FullyConnectedLayerFixture.h" +#include "tests/datasets/system_tests/alexnet/AlexNetFullyConnectedLayerDataset.h" +#include "tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1FullyConnectedLayerDataset.h" +#include "tests/datasets/system_tests/googlenet/inceptionv4/GoogLeNetInceptionV4FullyConnectedLayerDataset.h" +#include "tests/datasets/system_tests/lenet5/LeNet5FullyConnectedLayerDataset.h" +#include "tests/datasets/system_tests/vgg/vgg16/VGG16FullyConnectedLayerDataset.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "utils/TypePrinter.h" + +namespace arm_compute +{ +namespace test +{ +namespace +{ +const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::F16 }); +} // namespace + +using GCFullyConnectedLayerFixture = FullyConnectedLayerFixture; + +TEST_SUITE(GC) + +REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetFullyConnectedLayer, GCFullyConnectedLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::AlexNetFullyConnectedLayerDataset(), + data_types), + framework::dataset::make("Batches", 1))); + +REGISTER_FIXTURE_DATA_TEST_CASE(LeNet5FullyConnectedLayer, GCFullyConnectedLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::LeNet5FullyConnectedLayerDataset(), + data_types), + framework::dataset::make("Batches", 1))); + +REGISTER_FIXTURE_DATA_TEST_CASE(VGG16FullyConnectedLayer, GCFullyConnectedLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::VGG16FullyConnectedLayerDataset(), + data_types), + framework::dataset::make("Batches", 1))); + +REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1FullyConnectedLayer, GCFullyConnectedLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1FullyConnectedLayerDataset(), + data_types), + framework::dataset::make("Batches", 1))); + +REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4FullyConnectedLayer, GCFullyConnectedLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4FullyConnectedLayerDataset(), + data_types), + framework::dataset::make("Batches", 1))); + +TEST_SUITE(NIGHTLY) +REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetFullyConnectedLayer, GCFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::AlexNetFullyConnectedLayerDataset(), + data_types), + framework::dataset::make("Batches", { 4, 8 }))); + +REGISTER_FIXTURE_DATA_TEST_CASE(LeNet5FullyConnectedLayer, GCFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::LeNet5FullyConnectedLayerDataset(), + data_types), + framework::dataset::make("Batches", { 4, 8 }))); + +REGISTER_FIXTURE_DATA_TEST_CASE(VGG16FullyConnectedLayer, GCFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::VGG16FullyConnectedLayerDataset(), + data_types), + framework::dataset::make("Batches", { 4, 8 }))); + +REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1FullyConnectedLayer, GCFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1FullyConnectedLayerDataset(), + data_types), + framework::dataset::make("Batches", { 4, 8 }))); + +REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4FullyConnectedLayer, GCFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4FullyConnectedLayerDataset(), + data_types), + framework::dataset::make("Batches", { 4, 8 }))); +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace test +} // namespace arm_compute diff --git a/tests/benchmark/GLES_COMPUTE/GEMM.cpp b/tests/benchmark/GLES_COMPUTE/GEMM.cpp new file mode 100644 index 0000000000..69d8e54919 --- /dev/null +++ b/tests/benchmark/GLES_COMPUTE/GEMM.cpp @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h" +#include "tests/GLES_COMPUTE/GCAccessor.h" +#include "tests/benchmark/fixtures/GEMMFixture.h" +#include "tests/datasets/GoogleNetGEMMDataset.h" +#include "tests/datasets/MatrixMultiplyGEMMDataset.h" +#include "tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1GEMMDataset.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "utils/TypePrinter.h" + +namespace arm_compute +{ +namespace test +{ +namespace +{ +const auto data_types = framework::dataset::make("DataType", { DataType::F32 }); +} // namespace + +using GCGEMMFixture = GEMMFixture; + +TEST_SUITE(GC) + +REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1GEMM, GCGEMMFixture, framework::DatasetMode::ALL, framework::dataset::combine(datasets::GoogLeNetInceptionV1GEMMDataset(), data_types)); +REGISTER_FIXTURE_DATA_TEST_CASE(MatrixMultiplyGEMM, GCGEMMFixture, framework::DatasetMode::ALL, framework::dataset::combine(datasets::MatrixMultiplyGEMMDataset(), data_types)); +REGISTER_FIXTURE_DATA_TEST_CASE(GoogleNetGEMM, GCGEMMFixture, framework::DatasetMode::NIGHTLY, framework::dataset::combine(datasets::GoogleNetGEMMDataset(), data_types)); + +TEST_SUITE_END() +} // namespace test +} // namespace arm_compute diff --git a/tests/benchmark/GLES_COMPUTE/PoolingLayer.cpp b/tests/benchmark/GLES_COMPUTE/PoolingLayer.cpp new file mode 100644 index 0000000000..87c5382073 --- /dev/null +++ b/tests/benchmark/GLES_COMPUTE/PoolingLayer.cpp @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h" +#include "tests/GLES_COMPUTE/GCAccessor.h" +#include "tests/benchmark/fixtures/PoolingLayerFixture.h" +#include "tests/datasets/system_tests/alexnet/AlexNetPoolingLayerDataset.h" +#include "tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1PoolingLayerDataset.h" +#include "tests/datasets/system_tests/googlenet/inceptionv4/GoogLeNetInceptionV4PoolingLayerDataset.h" +#include "tests/datasets/system_tests/lenet5/LeNet5PoolingLayerDataset.h" +#include "tests/datasets/system_tests/squeezenet/SqueezeNetPoolingLayerDataset.h" +#include "tests/datasets/system_tests/vgg/vgg16/VGG16PoolingLayerDataset.h" +#include "tests/datasets/system_tests/yolo/v2/YOLOV2PoolingLayerDataset.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "utils/TypePrinter.h" + +namespace arm_compute +{ +namespace test +{ +namespace +{ +const auto data_types = framework::dataset::make("DataType", { DataType::F32 }); +} // namespace + +using GCPoolingLayerFixture = PoolingLayerFixture; + +TEST_SUITE(GC) + +REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetPoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::AlexNetPoolingLayerDataset(), + data_types), + framework::dataset::make("Batches", 1))); + +REGISTER_FIXTURE_DATA_TEST_CASE(LeNet5PoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::LeNet5PoolingLayerDataset(), + data_types), + framework::dataset::make("Batches", 1))); + +REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1PoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1PoolingLayerDataset(), + data_types), + framework::dataset::make("Batches", 1))); + +REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4PoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4PoolingLayerDataset(), + data_types), + framework::dataset::make("Batches", 1))); + +REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetPoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::SqueezeNetPoolingLayerDataset(), + data_types), + framework::dataset::make("Batches", 1))); + +REGISTER_FIXTURE_DATA_TEST_CASE(VGG16PoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::VGG16PoolingLayerDataset(), + data_types), + framework::dataset::make("Batches", 1))); + +REGISTER_FIXTURE_DATA_TEST_CASE(YOLOV2PoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::ALL, + framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2PoolingLayerDataset(), + data_types), + framework::dataset::make("Batches", 1))); + +TEST_SUITE(NIGHTLY) +REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetPoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::AlexNetPoolingLayerDataset(), + data_types), + framework::dataset::make("Batches", { 4, 8 }))); + +REGISTER_FIXTURE_DATA_TEST_CASE(LeNet5PoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::LeNet5PoolingLayerDataset(), + data_types), + framework::dataset::make("Batches", { 4, 8 }))); + +REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV1PoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV1PoolingLayerDataset(), + data_types), + framework::dataset::make("Batches", { 4, 8 }))); + +REGISTER_FIXTURE_DATA_TEST_CASE(GoogLeNetInceptionV4PoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::GoogLeNetInceptionV4PoolingLayerDataset(), + data_types), + framework::dataset::make("Batches", { 4, 8 }))); + +REGISTER_FIXTURE_DATA_TEST_CASE(SqueezeNetPoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::SqueezeNetPoolingLayerDataset(), + data_types), + framework::dataset::make("Batches", { 4, 8 }))); + +REGISTER_FIXTURE_DATA_TEST_CASE(VGG16PoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::VGG16PoolingLayerDataset(), + data_types), + framework::dataset::make("Batches", { 4, 8 }))); + +REGISTER_FIXTURE_DATA_TEST_CASE(YOLOV2PoolingLayer, GCPoolingLayerFixture, framework::DatasetMode::NIGHTLY, + framework::dataset::combine(framework::dataset::combine(datasets::YOLOV2PoolingLayerDataset(), + data_types), + framework::dataset::make("Batches", { 4, 8 }))); +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace test +} // namespace arm_compute diff --git a/tests/benchmark/fixtures/ConvolutionLayerFixture.h b/tests/benchmark/fixtures/ConvolutionLayerFixture.h index fd508d4500..09e6cbfaf8 100644 --- a/tests/benchmark/fixtures/ConvolutionLayerFixture.h +++ b/tests/benchmark/fixtures/ConvolutionLayerFixture.h @@ -30,6 +30,13 @@ #include "tests/Utils.h" #include "tests/framework/Fixture.h" +#ifdef ARM_COMPUTE_GC +#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" +#include "tests/GLES_COMPUTE/Helper.h" + +using namespace arm_compute::test::gles_compute; +#endif /* ARM_COMPUTE_GC */ + namespace arm_compute { namespace test @@ -71,6 +78,14 @@ public: void run() { conv_layer.run(); +#ifdef ARM_COMPUTE_GC + if(opengles31_is_available() && std::is_same::type, arm_compute::GCTensor>::value) + { + GCScheduler::get().sync(); + force_sync_tensor(src); + force_sync_tensor(dst); + } +#endif /* ARM_COMPUTE_GC */ } void teardown() diff --git a/tests/datasets/FullyConnectedLayerDataset.h b/tests/datasets/FullyConnectedLayerDataset.h index 9f8089d81a..b2008d604b 100644 --- a/tests/datasets/FullyConnectedLayerDataset.h +++ b/tests/datasets/FullyConnectedLayerDataset.h @@ -151,6 +151,7 @@ public: add_config(TensorShape(9U, 5U, 257U, 2U, 3U), TensorShape(11565U, 2123U), TensorShape(2123U), TensorShape(2123U, 2U, 3U)); } }; + } // namespace datasets } // namespace test } // namespace arm_compute diff --git a/tests/framework/Framework.cpp b/tests/framework/Framework.cpp index a5c665c458..39fe1fa00a 100644 --- a/tests/framework/Framework.cpp +++ b/tests/framework/Framework.cpp @@ -30,6 +30,11 @@ #include "arm_compute/runtime/CL/CLScheduler.h" #endif /* ARM_COMPUTE_CL */ +#ifdef ARM_COMPUTE_GC +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" +#endif /* ARM_COMPUTE_GC */ + #include #include #include @@ -292,6 +297,12 @@ void Framework::run_test(const TestInfo &info, TestCaseFactory &test_factory) CLScheduler::get().sync(); } #endif /* ARM_COMPUTE_CL */ +#ifdef ARM_COMPUTE_GC + if(opengles31_is_available()) + { + GCScheduler::get().sync(); + } +#endif /* ARM_COMPUTE_GC */ profiler.stop(); } diff --git a/tests/framework/SConscript b/tests/framework/SConscript index 52b8bed10f..f4beaf85ce 100644 --- a/tests/framework/SConscript +++ b/tests/framework/SConscript @@ -48,6 +48,11 @@ Help(new_options.GenerateHelpText(framework_env)) if(env['opencl']): framework_env.Append(CPPDEFINES=['ARM_COMPUTE_CL']) +if(env['gles_compute']): + framework_env.Append(CPPDEFINES=['ARM_COMPUTE_GC']) + if env['os'] != 'android': + framework_env.Append(CPPPATH = ["#opengles-3.1/include", "#opengles-3.1/mali_include"]) + framework_env.Append(CPPPATH = ["."]) framework_env.Append(CPPFLAGS=['-Wno-overloaded-virtual']) diff --git a/tests/main.cpp b/tests/main.cpp index d056267418..1f1f33a156 100644 --- a/tests/main.cpp +++ b/tests/main.cpp @@ -36,6 +36,9 @@ #ifdef ARM_COMPUTE_CL #include "arm_compute/runtime/CL/CLScheduler.h" #endif /* ARM_COMPUTE_CL */ +#ifdef ARM_COMPUTE_GC +#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" +#endif /* ARM_COMPUTE_GC */ #include "arm_compute/runtime/Scheduler.h" #include @@ -62,6 +65,10 @@ int main(int argc, char **argv) CLScheduler::get().default_init(); #endif /* ARM_COMPUTE_CL */ +#ifdef ARM_COMPUTE_GC + GCScheduler::get().default_init(); +#endif /* ARM_COMPUTE_CL */ + framework::Framework &framework = framework::Framework::get(); framework::CommandLineParser parser; diff --git a/tests/validation/GLES_COMPUTE/ActivationLayer.cpp b/tests/validation/GLES_COMPUTE/ActivationLayer.cpp new file mode 100644 index 0000000000..23821d35fa --- /dev/null +++ b/tests/validation/GLES_COMPUTE/ActivationLayer.cpp @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCActivationLayer.h" +#include "tests/GLES_COMPUTE/GCAccessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/ActivationFunctionsDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/ActivationLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +/** Define tolerance of the activation layer. + * + * @param[in] activation The activation function used. + * @param[in] data_type Data type. + * + * @return Tolerance depending on the activation function. + */ +AbsoluteTolerance tolerance(ActivationLayerInfo::ActivationFunction activation, DataType data_type) +{ + constexpr float epsilon = 1e-6f; + + switch(activation) + { + case ActivationLayerInfo::ActivationFunction::LINEAR: + return AbsoluteTolerance(data_type == DataType::F16 ? 0.2f : epsilon); + case ActivationLayerInfo::ActivationFunction::SQUARE: + return AbsoluteTolerance(data_type == DataType::F16 ? 0.1f : epsilon); + case ActivationLayerInfo::ActivationFunction::LOGISTIC: + if(is_data_type_fixed_point(data_type)) + { + return AbsoluteTolerance(5.f); + } + else + { + return AbsoluteTolerance(data_type == DataType::F16 ? 0.001f : epsilon); + } + case ActivationLayerInfo::ActivationFunction::LEAKY_RELU: + return AbsoluteTolerance(data_type == DataType::F16 ? 0.00001f : epsilon); + case ActivationLayerInfo::ActivationFunction::SOFT_RELU: + case ActivationLayerInfo::ActivationFunction::SQRT: + if(is_data_type_fixed_point(data_type)) + { + return AbsoluteTolerance(5.f); + } + else + { + return AbsoluteTolerance(data_type == DataType::F16 ? 0.01f : 0.00001f); + } + case ActivationLayerInfo::ActivationFunction::TANH: + if(is_data_type_fixed_point(data_type)) + { + return AbsoluteTolerance(5.f); + } + else + { + return AbsoluteTolerance(data_type == DataType::F16 ? 0.001f : 0.00001f); + } + default: + return AbsoluteTolerance(epsilon); + } +} + +/** CNN data types */ +const auto CNNDataTypes = framework::dataset::make("DataType", +{ + DataType::F16, + DataType::F32, +}); + +/** Input data sets. */ +const auto ActivationDataset = combine(combine(framework::dataset::make("InPlace", { false, true }), datasets::ActivationFunctions()), framework::dataset::make("AlphaBeta", { 0.5f, 1.f })); +} // namespace + +TEST_SUITE(GC) +TEST_SUITE(ActivationLayer) + +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), CNNDataTypes), framework::dataset::make("InPlace", { false, true })), + shape, data_type, in_place) +{ + // Set fixed point position data type allowed + const int fixed_point_position = 0; + + // Create tensors + GCTensor src = create_tensor(shape, data_type, 1, fixed_point_position); + GCTensor dst = create_tensor(shape, data_type, 1, fixed_point_position); + + ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Create and configure function + GCActivationLayer act_layer; + + if(in_place) + { + act_layer.configure(&src, nullptr, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS)); + } + else + { + act_layer.configure(&src, &dst, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS)); + } + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(src.info()->valid_region(), valid_region); + + if(!in_place) + { + validate(dst.info()->valid_region(), valid_region); + } + + // Validate padding + const int step = (arm_compute::data_size_from_type(data_type) == 4 ? 1 : 2); + const PaddingSize padding = PaddingCalculator(shape.x(), step).required_padding(); + validate(src.info()->padding(), padding); + + if(!in_place) + { + validate(dst.info()->padding(), padding); + } +} + +template +using GCActivationLayerFixture = ActivationValidationFixture; + +TEST_SUITE(Float) +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, GCActivationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ActivationDataset), + framework::dataset::make("DataType", + DataType::F16))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance(_function, _data_type)); +} +FIXTURE_DATA_TEST_CASE(RunLarge, GCActivationLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ActivationDataset), + framework::dataset::make("DataType", + DataType::F16))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance(_function, _data_type)); +} +TEST_SUITE_END() + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, GCActivationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ActivationDataset), framework::dataset::make("DataType", + DataType::F32))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance(_function, _data_type)); +} +FIXTURE_DATA_TEST_CASE(RunLarge, GCActivationLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ActivationDataset), framework::dataset::make("DataType", + DataType::F32))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance(_function, _data_type)); +} +TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/GLES_COMPUTE/BatchNormalizationLayer.cpp b/tests/validation/GLES_COMPUTE/BatchNormalizationLayer.cpp new file mode 100644 index 0000000000..a82149bdcc --- /dev/null +++ b/tests/validation/GLES_COMPUTE/BatchNormalizationLayer.cpp @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCBatchNormalizationLayer.h" +#include "tests/GLES_COMPUTE/GCAccessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/RandomBatchNormalizationLayerDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/BatchNormalizationLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +constexpr AbsoluteTolerance tolerance_f(0.00001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ +constexpr AbsoluteTolerance tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ +} // namespace + +TEST_SUITE(GC) +TEST_SUITE(BatchNormalizationLayer) + +template +using GCBatchNormalizationLayerFixture = BatchNormalizationLayerValidationFixture; + +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::RandomBatchNormalizationLayerDataset(), framework::dataset::make("DataType", { DataType::F32 })), + shape0, shape1, epsilon, dt) +{ + // Set fixed point position data type allowed + int fixed_point_position = (arm_compute::is_data_type_fixed_point(dt)) ? 3 : 0; + + // Create tensors + GCTensor src = create_tensor(shape0, dt, 1, fixed_point_position); + GCTensor dst = create_tensor(shape0, dt, 1, fixed_point_position); + GCTensor mean = create_tensor(shape1, dt, 1, fixed_point_position); + GCTensor var = create_tensor(shape1, dt, 1, fixed_point_position); + GCTensor beta = create_tensor(shape1, dt, 1, fixed_point_position); + GCTensor gamma = create_tensor(shape1, dt, 1, fixed_point_position); + + // Create and Configure function + GCBatchNormalizationLayer norm; + norm.configure(&src, &dst, &mean, &var, &beta, &gamma, epsilon); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape0); + validate(dst.info()->valid_region(), valid_region); +} + +TEST_SUITE(Float) +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(Random, GCBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomBatchNormalizationLayerDataset(), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance_f16, 0); +} +TEST_SUITE_END() + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(Random, GCBatchNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::RandomBatchNormalizationLayerDataset(), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance_f, 0); +} +TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/GLES_COMPUTE/DepthConcatenateLayer.cpp b/tests/validation/GLES_COMPUTE/DepthConcatenateLayer.cpp new file mode 100644 index 0000000000..829845dd36 --- /dev/null +++ b/tests/validation/GLES_COMPUTE/DepthConcatenateLayer.cpp @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDepthConcatenate.h" +#include "tests/GLES_COMPUTE/GCAccessor.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/DepthConcatenateLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +TEST_SUITE(GC) +TEST_SUITE(DepthConcatenateLayer) + +//TODO(COMPMID-415): Add configuration test? + +template +using GCDepthConcatenateLayerFixture = DepthConcatenateValidationFixture; + +TEST_SUITE(Float) +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthConcatenateLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType", + DataType::F16))) +{ + // Validate output + validate(GCAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType", + DataType::F16))) +{ + // Validate output + validate(GCAccessor(_target), _reference); +} +TEST_SUITE_END() + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthConcatenateLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::Small2DShapes(), framework::dataset::make("DataType", + DataType::F32))) +{ + // Validate output + validate(GCAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthConcatenateLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::DepthConcatenateShapes(), framework::dataset::make("DataType", + DataType::F32))) +{ + // Validate output + validate(GCAccessor(_target), _reference); +} +TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/GLES_COMPUTE/DirectConvolutionLayer.cpp b/tests/validation/GLES_COMPUTE/DirectConvolutionLayer.cpp new file mode 100644 index 0000000000..153b060757 --- /dev/null +++ b/tests/validation/GLES_COMPUTE/DirectConvolutionLayer.cpp @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h" +#include "tests/GLES_COMPUTE/GCAccessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/DirectConvolutionLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +RelativeTolerance tolerance_fp16(half(0.2)); /**< Tolerance for floating point tests */ +RelativeTolerance tolerance_fp32(0.02f); /**< Tolerance for floating point tests */ +constexpr float tolerance_num = 0.07f; /**< Tolerance number */ + +/** Direct convolution data set. */ +const auto data_quantized = combine(datasets::SmallDirectConvolutionShapes(), + combine(framework::dataset::make("StrideX", 1, 3), + combine(framework::dataset::make("StrideY", 1, 3), + combine(concat(combine(framework::dataset::make("PadX", 0), + combine(framework::dataset::make("PadY", 0), + framework::dataset::make("KernelSize", 1))), + combine(framework::dataset::make("PadX", 0, 2), + combine(framework::dataset::make("PadY", 0, 2), + framework::dataset::make("KernelSize", { 3 })))), + framework::dataset::make("NumKernels", { 1, 4, 8, 16 }))))); + +const auto data = combine(datasets::SmallDirectConvolutionShapes(), + combine(framework::dataset::make("StrideX", 1, 3), + combine(framework::dataset::make("StrideY", 1, 3), + combine(concat(combine(framework::dataset::make("PadX", 0), + combine(framework::dataset::make("PadY", 0), + framework::dataset::make("KernelSize", 1))), + combine(framework::dataset::make("PadX", 0, 2), + combine(framework::dataset::make("PadY", 0, 2), + framework::dataset::make("KernelSize", { 3, 5 })))), + framework::dataset::make("NumKernels", { 1, 4, 8, 16 }))))); +} // namespace + +TEST_SUITE(GC) +TEST_SUITE(DirectConvolutionLayer) + +//TODO(COMPMID-415): Configuration tests? + +template +using GCDirectConvolutionLayerFixture = DirectConvolutionValidationFixture; + +TEST_SUITE(Float) +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(Run, GCDirectConvolutionLayerFixture, framework::DatasetMode::ALL, combine(data, framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance_fp16, tolerance_num); +} +TEST_SUITE_END() + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(Run, GCDirectConvolutionLayerFixture, framework::DatasetMode::ALL, combine(data, framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance_fp32); +} +TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/GLES_COMPUTE/DropoutLayer.cpp b/tests/validation/GLES_COMPUTE/DropoutLayer.cpp new file mode 100644 index 0000000000..4d54dad15c --- /dev/null +++ b/tests/validation/GLES_COMPUTE/DropoutLayer.cpp @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCDropoutLayer.h" +#include "tests/GLES_COMPUTE/GCAccessor.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/DropoutLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +//constexpr AbsoluteTolerance tolerance_f(0.00001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ + +const auto testparam = combine(framework::dataset::make("ratio", { 0.5f }), framework::dataset::make("forward", { false, true })); +} // namespace + +TEST_SUITE(GC) +TEST_SUITE(DropoutLayer) + +template +using GCDropoutLayerFixture = DropoutLayerValidationFixture; + +TEST_SUITE(Float) +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(Random, GCDropoutLayerFixture, framework::DatasetMode::PRECOMMIT, + combine(combine(datasets::SmallShapes(), testparam), framework::dataset::make("DataType", DataType::F32))) +{ + // FIXME(APPBROWSER-302) + // Validate output + //validate(GCAccessor(_target), _reference, tolerance_f, 0); +} +TEST_SUITE_END() + +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(Random, GCDropoutLayerFixture, framework::DatasetMode::PRECOMMIT, + combine(combine(datasets::SmallShapes(), testparam), framework::dataset::make("DataType", DataType::F16))) +{ + // FIXME(APPBROWSER-302) + // Validate output + //validate(GCAccessor(_target), _reference, tolerance_f, 0); +} +TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp b/tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp new file mode 100644 index 0000000000..4040f468f4 --- /dev/null +++ b/tests/validation/GLES_COMPUTE/FullyConnectedLayer.cpp @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCFullyConnectedLayer.h" +#include "tests/GLES_COMPUTE/GCAccessor.h" +#include "tests/GLES_COMPUTE/Helper.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/FullyConnectedLayerDataset.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/FullyConnectedLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +/** Tolerance for float operations */ +RelativeTolerance tolerance_f32(0.05f); +RelativeTolerance tolerance_f16(half(0.2)); +constexpr float tolerance_num = 0.07f; /**< Tolerance number */ + +/** CNN data types */ +const auto CNNDataTypes = framework::dataset::make("DataType", +{ + DataType::F16, + DataType::F32, +}); + +const auto FullyConnectedParameters = combine(framework::dataset::make("TransposeWeights", { false, true }), framework::dataset::make("ReshapeWeights", { false, true })); +} // namespace + +TEST_SUITE(GC) +TEST_SUITE(FullyConnectedLayer) + +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallFullyConnectedLayerDataset(), datasets::LargeFullyConnectedLayerDataset()), + FullyConnectedParameters), + CNNDataTypes), + src_shape, weights_shape, bias_shape, dst_shape, transpose_weights, reshape_weights, data_type) +{ + // Set fixed point position data type allowed + int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0; + + TensorShape ws(weights_shape); + + // Transpose weights if not done in the function + if(!reshape_weights || !transpose_weights) + { + const size_t shape_x = ws.x(); + ws.set(0, ws.y()); + ws.set(1, shape_x); + } + + // Create tensors + GCTensor src = create_tensor(src_shape, data_type, 1, fixed_point_position); + GCTensor weights = create_tensor(ws, data_type, 1, fixed_point_position); + GCTensor bias = create_tensor(bias_shape, data_type, 1, fixed_point_position); + GCTensor dst = create_tensor(dst_shape, data_type, 1, fixed_point_position); + + ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Create and configure function. + GCFullyConnectedLayer fc; + fc.configure(&src, &weights, &bias, &dst, transpose_weights, !reshape_weights); + + // Validate valid region + const ValidRegion dst_valid_region = shape_to_valid_region(dst_shape); + validate(dst.info()->valid_region(), dst_valid_region); +} + +template +using GCFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture; + +TEST_SUITE(Float) +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, GCFullyConnectedLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(), + FullyConnectedParameters), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance_f16, tolerance_num); +} +FIXTURE_DATA_TEST_CASE(RunLarge, GCFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(), + FullyConnectedParameters), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance_f16, tolerance_num); +} +TEST_SUITE_END() + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, GCFullyConnectedLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, GCFullyConnectedLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/GLES_COMPUTE/GEMM.cpp b/tests/validation/GLES_COMPUTE/GEMM.cpp new file mode 100644 index 0000000000..2abad3206d --- /dev/null +++ b/tests/validation/GLES_COMPUTE/GEMM.cpp @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCGEMM.h" +#include "tests/GLES_COMPUTE/GCAccessor.h" +#include "tests/GLES_COMPUTE/Helper.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/LargeGEMMDataset.h" +#include "tests/datasets/SmallGEMMDataset.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/GEMMFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +RelativeTolerance tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ + +/** CNN data types */ +const auto CNNDataTypes = framework::dataset::make("DataType", +{ + DataType::F32, +}); +} // namespace + +TEST_SUITE(GC) +TEST_SUITE(GEMM) + +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallGEMMDataset(), datasets::LargeGEMMDataset()), CNNDataTypes), + shape_a, shape_b, shape_c, output_shape, alpha, beta, data_type) +{ + // Set fixed point position data type allowed + const int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0; + + // Create tensors + GCTensor a = create_tensor(shape_a, data_type, 1, fixed_point_position); + GCTensor b = create_tensor(shape_b, data_type, 1, fixed_point_position); + GCTensor c = create_tensor(shape_c, data_type, 1, fixed_point_position); + GCTensor dst = create_tensor(output_shape, data_type, 1, fixed_point_position); + + ARM_COMPUTE_EXPECT(a.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(b.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(c.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Create and configure function + GCGEMM gemm; + gemm.configure(&a, &b, &c, &dst, alpha, beta); + + //TODO(COMPMID-415): Validate valid region +} + +template +using GCGEMMFixture = GEMMValidationFixture; + +TEST_SUITE(Float) +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, GCGEMMFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallGEMMDataset(), framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, GCGEMMFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMDataset(), framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/GLES_COMPUTE/GlobalPoolingLayer.cpp b/tests/validation/GLES_COMPUTE/GlobalPoolingLayer.cpp new file mode 100644 index 0000000000..88372ffe24 --- /dev/null +++ b/tests/validation/GLES_COMPUTE/GlobalPoolingLayer.cpp @@ -0,0 +1,83 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h" +#include "tests/GLES_COMPUTE/GCAccessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/PoolingTypesDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/PoolingLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +/** Input data set for float data types */ +const auto GlobalPoolingLayerDataset = combine(datasets::GlobalPoolingShapes(), datasets::PoolingTypes()); + +/** Input data set for quantized data types */ +constexpr AbsoluteTolerance tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for FP32 types */ +constexpr AbsoluteTolerance tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for FP16 types */ +} // namespace + +TEST_SUITE(GC) +TEST_SUITE(GlobalPoolingLayer) + +template +using GCGlobalPoolingLayerFixture = GlobalPoolingLayerValidationFixture; + +TEST_SUITE(Float) +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunGlobalPooling, GCGlobalPoolingLayerFixture, framework::DatasetMode::ALL, combine(GlobalPoolingLayerDataset, framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() + +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunGlobalPooling, GCGlobalPoolingLayerFixture, framework::DatasetMode::ALL, combine(GlobalPoolingLayerDataset, framework::dataset::make("DataType", + DataType::F16))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance_f16); +} +TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/GLES_COMPUTE/NormalizationLayer.cpp b/tests/validation/GLES_COMPUTE/NormalizationLayer.cpp new file mode 100644 index 0000000000..4f6ae55677 --- /dev/null +++ b/tests/validation/GLES_COMPUTE/NormalizationLayer.cpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCNormalizationLayer.h" +#include "tests/GLES_COMPUTE/GCAccessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/NormalizationTypesDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/NormalizationLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +/** Tolerance for float operations */ +constexpr AbsoluteTolerance tolerance_f32(0.00001f); + +/** Input data set. */ +const auto NormalizationDataset = combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("NormType", { NormType::IN_MAP_1D, NormType::CROSS_MAP })), + framework::dataset::make("NormalizationSize", 3, 9, 2)), + framework::dataset::make("Beta", { 0.5f, 1.f, 2.f })); +} // namespace + +TEST_SUITE(GC) +TEST_SUITE(NormalizationLayer) + +//TODO(COMPMID-415): Missing configuration? + +template +using GCNormalizationLayerFixture = NormalizationValidationFixture; + +TEST_SUITE(Float) +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, GCNormalizationLayerFixture, framework::DatasetMode::PRECOMMIT, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, GCNormalizationLayerFixture, framework::DatasetMode::NIGHTLY, combine(NormalizationDataset, framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() +TEST_SUITE_END() + +template +using GCNormalizationLayerFixedPointFixture = NormalizationValidationFixedPointFixture; + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/GLES_COMPUTE/PoolingLayer.cpp b/tests/validation/GLES_COMPUTE/PoolingLayer.cpp new file mode 100644 index 0000000000..a78b27edc2 --- /dev/null +++ b/tests/validation/GLES_COMPUTE/PoolingLayer.cpp @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h" +#include "tests/GLES_COMPUTE/GCAccessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/PoolingTypesDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/PoolingLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +// FIXME(APPBROWSER-304): Add exclude padding support for OpenGL ES implementation +/** Input data set for float data types */ +const auto PoolingLayerDatasetFP = combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { 2, 3, 4, 7, 9 })), + framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })), + framework::dataset::make("ExcludePadding", { /* true, */ false })); + +// FIXME(APPBROWSER-304): Add exclude padding support for OpenGL ES implementation +// constexpr AbsoluteTolerance tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for float types */ +// constexpr AbsoluteTolerance tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for float types */ + +} // namespace + +TEST_SUITE(GC) +TEST_SUITE(PoolingLayer) + +template +using GCPoolingLayerFixture = PoolingLayerValidationFixture; + +TEST_SUITE(Float) +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, GCPoolingLayerFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFP, framework::dataset::make("DataType", + DataType::F32)))) +{ + // FIXME(APPBROWSER-304): Add exclude padding support for OpenGL ES implementation + // Validate output + // validate(GCAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, GCPoolingLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), combine(PoolingLayerDatasetFP, framework::dataset::make("DataType", + DataType::F32)))) +{ + // FIXME(APPBROWSER-304): Add exclude padding support for OpenGL ES implementation + // Validate output + // validate(GCAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() + +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, GCPoolingLayerFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFP, + framework::dataset::make("DataType", DataType::F16)))) +{ + // FIXME(APPBROWSER-304): Add exclude padding support for OpenGL ES implementation + // Validate output + // validate(GCAccessor(_target), _reference, tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunLarge, GCPoolingLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), combine(PoolingLayerDatasetFP, + framework::dataset::make("DataType", DataType::F16)))) +{ + // FIXME(APPBROWSER-304): Add exclude padding support for OpenGL ES implementation + // Validate output + // validate(GCAccessor(_target), _reference, tolerance_f16); +} +TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp b/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp new file mode 100644 index 0000000000..888f87e9ef --- /dev/null +++ b/tests/validation/GLES_COMPUTE/SoftmaxLayer.cpp @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" +#include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" +#include "arm_compute/runtime/GLES_COMPUTE/functions/GCSoftmaxLayer.h" +#include "tests/GLES_COMPUTE/GCAccessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/SoftmaxLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +/** Tolerance for float operations */ +RelativeTolerance tolerance_f16(half(0.2)); +RelativeTolerance tolerance_f32(0.001f); + +/** CNN data types */ +const auto CNNDataTypes = framework::dataset::make("DataType", +{ + DataType::F16, + DataType::F32, +}); +} // namespace + +TEST_SUITE(GC) +TEST_SUITE(SoftmaxLayer) + +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::SmallShapes(), datasets::LargeShapes()), CNNDataTypes), shape, data_type) +{ + // Set fixed point position data type allowed + const int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0; + + // Create tensors + GCTensor src = create_tensor(shape, data_type, 1, fixed_point_position); + GCTensor dst = create_tensor(shape, data_type, 1, fixed_point_position); + + ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Create and configure function + GCSoftmaxLayer smx_layer; + smx_layer.configure(&src, &dst); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(src.info()->valid_region(), valid_region); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 4).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +template +using GCSoftmaxLayerFixture = SoftmaxValidationFixture; + +TEST_SUITE(Float) +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, GCSoftmaxLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunLarge, GCSoftmaxLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance_f16); +} +TEST_SUITE_END() + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, GCSoftmaxLayerFixture, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, GCSoftmaxLayerFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(GCAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/fixtures/DropoutLayerFixture.h b/tests/validation/fixtures/DropoutLayerFixture.h new file mode 100644 index 0000000000..3a077dbbea --- /dev/null +++ b/tests/validation/fixtures/DropoutLayerFixture.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_TEST_DROPOUT_LAYER_FIXTURE +#define ARM_COMPUTE_TEST_DROPOUT_LAYER_FIXTURE + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/Tensor.h" +#include "tests/AssetsLibrary.h" +#include "tests/Globals.h" +#include "tests/IAccessor.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Fixture.h" + +#include + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +template +class DropoutLayerValidationFixture : public framework::Fixture +{ +public: + template + void setup(TensorShape shape, float ratio, bool forward, DataType data_type) + { + _target = compute_target(shape, ratio, forward, data_type); + } + +protected: + template + void fill(U &&tensor) + { + library->fill_tensor_uniform(tensor, 0); + } + + TensorType compute_target(const TensorShape &shape, float ratio, bool forward, DataType data_type) + { + // Create tensors + TensorType src = create_tensor(shape, data_type, 1); + TensorType mask = create_tensor(shape, data_type, 1); + TensorType dst = create_tensor(shape, data_type, 1); + + // Create and configure function + FunctionType dropout_layer; + dropout_layer.configure(&src, &mask, &dst, ratio, forward); + + ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Allocate tensors + src.allocator()->allocate(); + mask.allocator()->allocate(); + dst.allocator()->allocate(); + + ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!mask.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Fill tensors + fill(AccessorType(src)); + + // Compute function + dropout_layer.run(); + + return dst; + } + + SimpleTensor compute_reference(const TensorShape &shape, DataType data_type) + { + } + + TensorType _target{}; + SimpleTensor _reference{}; + int _fractional_bits{}; +}; +} // namespace validation +} // namespace test +} // namespace arm_compute + +#endif /* ARM_COMPUTE_TEST_DROPOUT_LAYER_FIXTURE */ diff --git a/utils/Utils.h b/utils/Utils.h index 28382f47e4..76329671af 100644 --- a/utils/Utils.h +++ b/utils/Utils.h @@ -37,6 +37,9 @@ #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/runtime/CL/CLTensor.h" #endif /* ARM_COMPUTE_CL */ +#ifdef ARM_COMPUTE_GC +#include "arm_compute/runtime/GLES_COMPUTE/GCTensor.h" +#endif /* ARM_COMPUTE_GC */ #include #include @@ -188,6 +191,27 @@ inline void unmap(CLTensor &tensor) } #endif /* ARM_COMPUTE_CL */ +#ifdef ARM_COMPUTE_GC +/** Maps a tensor if needed + * + * @param[in] tensor Tensor to be mapped + * @param[in] blocking Specified if map is blocking or not + */ +inline void map(GCTensor &tensor, bool blocking) +{ + tensor.map(blocking); +} + +/** Unmaps a tensor if needed + * + * @param tensor Tensor to be unmapped + */ +inline void unmap(GCTensor &tensor) +{ + tensor.unmap(); +} +#endif /* ARM_COMPUTE_GC */ + /** Class to load the content of a PPM file into an Image */ class PPMLoader @@ -256,7 +280,7 @@ public: ARM_COMPUTE_ERROR_ON_FORMAT_NOT_IN(&image, arm_compute::Format::U8, arm_compute::Format::RGB888); try { - // Map buffer if creating a CLTensor + // Map buffer if creating a CLTensor/GCTensor map(image, true); // Check if the file is large enough to fill the image @@ -319,7 +343,7 @@ public: ARM_COMPUTE_ERROR("Unsupported format"); } - // Unmap buffer if creating a CLTensor + // Unmap buffer if creating a CLTensor/GCTensor unmap(image); } catch(const std::ifstream::failure &e) @@ -610,7 +634,7 @@ void save_to_ppm(T &tensor, const std::string &ppm_filename) fs << "P6\n" << width << " " << height << " 255\n"; - // Map buffer if creating a CLTensor + // Map buffer if creating a CLTensor/GCTensor map(tensor, true); switch(tensor.info()->format()) @@ -653,7 +677,7 @@ void save_to_ppm(T &tensor, const std::string &ppm_filename) ARM_COMPUTE_ERROR("Unsupported format"); } - // Unmap buffer if creating a CLTensor + // Unmap buffer if creating a CLTensor/GCTensor unmap(tensor); } catch(const std::ofstream::failure &e) @@ -762,7 +786,7 @@ void load_trained_data(T &tensor, const std::string &filename) throw std::runtime_error("Could not load binary data: " + filename); } - // Map buffer if creating a CLTensor + // Map buffer if creating a CLTensor/GCTensor map(tensor, true); Window window; @@ -782,10 +806,8 @@ void load_trained_data(T &tensor, const std::string &filename) }, in); -#ifdef ARM_COMPUTE_CL - // Unmap buffer if creating a CLTensor + // Unmap buffer if creating a CLTensor/GCTensor unmap(tensor); -#endif /* ARM_COMPUTE_CL */ } catch(const std::ofstream::failure &e) { -- cgit v1.2.1