From 7068f9900d136312318ff430aef588b14e0c87ad Mon Sep 17 00:00:00 2001 From: Anthony Barbier Date: Thu, 26 Oct 2017 15:23:08 +0100 Subject: COMPMID-631: Merge branches/gles_compute branch Last commit: commit b25c5f68042b0c81bf611d59a1bb8535e1c42497 Author: Xinghang Zhou Date: Wed Oct 25 18:48:10 2017 +0800 Synced validation's tolerances of GCSoftmax from cl side Change-Id: Ibe72054205c1c8721845d679a31af7ed0a7c5cf6 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/93283 Reviewed-by: Anthony Barbier Tested-by: Kaizen --- arm_compute/core/GLES_COMPUTE/GCHelpers.h | 64 +++++ arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h | 306 +++++++++++++++++++++ arm_compute/core/GLES_COMPUTE/GCKernels.h | 48 ++++ arm_compute/core/GLES_COMPUTE/IGCKernel.h | 179 ++++++++++++ arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h | 41 +++ arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h | 43 +++ arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h | 66 +++++ arm_compute/core/GLES_COMPUTE/IGCTensor.h | 99 +++++++ arm_compute/core/GLES_COMPUTE/OpenGLES.h | 165 +++++++++++ .../kernels/GCAbsoluteDifferenceKernel.h | 71 +++++ .../GLES_COMPUTE/kernels/GCActivationLayerKernel.h | 68 +++++ .../kernels/GCBatchNormalizationLayerKernel.h | 77 ++++++ .../core/GLES_COMPUTE/kernels/GCCol2ImKernel.h | 92 +++++++ .../kernels/GCDepthConcatenateKernel.h | 76 +++++ .../kernels/GCDirectConvolutionLayerKernel.h | 87 ++++++ .../core/GLES_COMPUTE/kernels/GCDropoutKernel.h | 79 ++++++ .../core/GLES_COMPUTE/kernels/GCFillBorderKernel.h | 77 ++++++ .../kernels/GCGEMMInterleave4x4Kernel.h | 80 ++++++ .../kernels/GCGEMMMatrixAccumulateBiasesKernel.h | 63 +++++ .../kernels/GCGEMMMatrixAdditionKernel.h | 70 +++++ .../kernels/GCGEMMMatrixMultiplyKernel.h | 79 ++++++ .../kernels/GCGEMMTranspose1xWKernel.h | 67 +++++ .../core/GLES_COMPUTE/kernels/GCIm2ColKernel.h | 109 ++++++++ .../kernels/GCNormalizationLayerKernel.h | 72 +++++ .../kernels/GCPixelWiseMultiplicationKernel.h | 70 +++++ .../GLES_COMPUTE/kernels/GCPoolingLayerKernel.h | 70 +++++ .../GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h | 112 ++++++++ .../core/GLES_COMPUTE/kernels/GCTransposeKernel.h | 52 ++++ 28 files changed, 2482 insertions(+) create mode 100644 arm_compute/core/GLES_COMPUTE/GCHelpers.h create mode 100644 arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h create mode 100644 arm_compute/core/GLES_COMPUTE/GCKernels.h create mode 100644 arm_compute/core/GLES_COMPUTE/IGCKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/IGCTensor.h create mode 100644 arm_compute/core/GLES_COMPUTE/OpenGLES.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h create mode 100644 arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h (limited to 'arm_compute/core/GLES_COMPUTE') diff --git a/arm_compute/core/GLES_COMPUTE/GCHelpers.h b/arm_compute/core/GLES_COMPUTE/GCHelpers.h new file mode 100644 index 0000000000..475554f2be --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/GCHelpers.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCHELPERS_H__ +#define __ARM_COMPUTE_GCHELPERS_H__ + +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/Helpers.h" +#include "support/ToolchainSupport.h" + +#include + +namespace arm_compute +{ +/** Helper function to create and return a unique_ptr pointed to a GLES kernel object + * It also calls the kernel's configuration. + * + * @param[in] args All the arguments that need pass to kernel's configuration. + * + * @return A unique pointer pointed to a GLES kernel object + */ +template +std::unique_ptr create_configure_kernel(T &&... args) +{ + std::unique_ptr k = arm_compute::support::cpp14::make_unique(); + k->configure(std::forward(args)...); + return k; +} + +/** Helper function to create and return a unique_ptr pointed to a GLES kernel object + * + * @return A unique pointer pointed to a GLES kernel object + */ +template +std::unique_ptr create_kernel() +{ + std::unique_ptr k = arm_compute::support::cpp14::make_unique(); + return k; +} + +/** Max vector width of an GLES vector */ +static constexpr unsigned int max_gc_vector_width = 16; +} +#endif /* __ARM_COMPUTE_GCHELPERS_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h b/arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h new file mode 100644 index 0000000000..e601b529ed --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCKERNELLIBRARY_H__ +#define __ARM_COMPUTE_GCKERNELLIBRARY_H__ + +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/Utils.h" + +#include +#include +#include +#include +#include + +namespace arm_compute +{ +/** GCProgram class */ +class GCProgram +{ +public: + /** Default constructor. */ + GCProgram(); + /** Construct program from source file. + * + * @param[in] name Program name. + * @param[in] source Program source. + */ + GCProgram(std::string name, std::string source); + /** Default Copy Constructor. */ + GCProgram(const GCProgram &) = default; + /** Default Move Constructor. */ + GCProgram(GCProgram &&) = default; + /** Default copy assignment operator. */ + GCProgram &operator=(const GCProgram &) = default; + /** Default move assignment operator. */ + GCProgram &operator=(GCProgram &&) = default; + /** Returns program name. + * + * @return Program's name. + */ + std::string name() const + { + return _name; + } + /** Link program. + * + * @param[in] shader Shader used to link program. + * + * @return linked program id . + */ + GLuint link_program(GLuint shader); + /** Compile shader. + * + * @param[in] build_options Shader build options. + * + * @return GLES shader object. + */ + GLuint compile_shader(const std::string &build_options); + +private: + std::string _name; /**< Program name. */ + std::string _source; /**< Source code for the program. */ +}; + +/** GCKernel class */ +class GCKernel +{ +public: + /** Default Constructor. */ + GCKernel(); + /** Default Copy Constructor. */ + GCKernel(const GCKernel &) = default; + /** Default Move Constructor. */ + GCKernel(GCKernel &&) = default; + /** Default copy assignment operator. */ + GCKernel &operator=(const GCKernel &) = default; + /** Default move assignment operator. */ + GCKernel &operator=(GCKernel &&) = default; + /** Constructor. + * + * @param[in] name Kernel name. + * @param[in] program Built program. + */ + GCKernel(std::string name, GLuint program); + /** Returns kernel name. + * + * @return Kernel's name. + */ + std::string name() const + { + return _name; + } + /** Get program id. + * + * @return program id. + */ + GLuint get_program() const + { + return _program; + } + /** Use current program. + * + * @return program id. + */ + void use(); + /** Unuse current program. + * + * @return program id. + */ + void unuse(); + /** Set value at uniform idx. + * + * @param[in] idx Index in vector. + * @param[in] value Set value. + */ + template + void set_params(unsigned int idx, T value) + { + if(idx >= _params.size()) + { + _params.resize(idx + 1, 0); + } + + unsigned int *p = reinterpret_cast(&value); + _params[idx] = *p; + } + /** Clear params. + * + */ + void clear_params() + { + _params.clear(); + } + /** Set shader params binding point. + * + * @param[in] binding Shader params binding point. + */ + void set_shader_params_binding_point(unsigned int binding) + { + _shader_params_binding_point = binding; + } + /** Update shader params. + * + */ + void update_shader_params(); + /** Clean up program and ubo. + * + */ + void cleanup(); + +private: + std::string _name; /**< Kernel name */ + GLuint _program; /**< Linked program id */ + std::vector _params; /**< Store all the values of the shader parameters */ + GLuint _shader_params; /**< Uniform buffer object name for shader parameters */ + GLuint _shader_params_binding_point; /**< The binding point of the uniform block for shader parameters */ + GLuint _shader_params_index; /**< The index of the uniform block */ + GLint _shader_params_size; /**< The uniform block data size in the shader */ + static constexpr const char *_shader_params_name = "shader_params"; /**< The uniform block name in the shader */ +}; + +/** GCKernelLibrary class */ +class GCKernelLibrary +{ + using StringSet = std::set; + +private: + /** Default Constructor. */ + GCKernelLibrary(); + +public: + /** Prevent instances of this class from being copied. */ + GCKernelLibrary(const GCKernelLibrary &) = delete; + /** Prevent instances of this class from being copied. */ + const GCKernelLibrary &operator=(const GCKernelLibrary &) = delete; + /** Default Destructor. */ + ~GCKernelLibrary(); + + static GCKernelLibrary &get(); + /** Initialises the kernel library. + * + * @param[in] shader_path (Optional) Path of the directory from which shader sources are loaded. + * @param[in] dpy (Optional) EGLdisplay set by external application. + * @param[in] ctx (Optional) EGLContext set by external application. + */ + void init(std::string shader_path = "./", EGLDisplay dpy = EGL_NO_DISPLAY, EGLContext ctx = EGL_NO_CONTEXT) + { + //TODO: deal with old display and context. + _shader_path = std::move(shader_path); + + _display = dpy; + _context = ctx; + + if(_display == EGL_NO_DISPLAY || _context == EGL_NO_CONTEXT) + { + setup_context(); + + _own_context = true; + } + + eglMakeCurrent(_display, EGL_NO_SURFACE, EGL_NO_SURFACE, _context); + setup_dummy_fbo(); + } + + /** Sets the path that the shaders reside in. + * + * @param[in] shader_path Path of the shader. + */ + void set_shader_path(const std::string &shader_path) + { + _shader_path = shader_path; + }; + /** Sets display and context to create kernel. + * + * @param[in] dpy EGLdisplay set by external application. + * @param[in] ctx EGLContext set by external application. + */ + void set_context(EGLDisplay dpy, EGLContext ctx) + { + //TODO: deal with old display and context. + _display = dpy; + _context = ctx; + + eglMakeCurrent(dpy, EGL_NO_SURFACE, EGL_NO_SURFACE, ctx); + setup_dummy_fbo(); + }; + /** Creates a kernel from the kernel library. + * + * @param[in] shader_name Shader name. + * @param[in] build_options_set Shader build options as a set. + * + * @return The created kernel. + */ + GCKernel create_kernel(const std::string &shader_name, const StringSet &build_options_set = {}) const; + /** Serializes and saves programs to a binary. + * + */ + void save_binary(); + /** Load serialized binary with all the programs. + * + */ + void load_binary(); + /** Setup a dummy fbo to workaround an issue on Galaxy S8. + * + */ + void setup_dummy_fbo(); + +private: + /** Preprocess GLES shader + * + * @param[in] shader_source Source code of the shader to preprocess. + * + * @return Preprocessed GLES shader object. + */ + const std::string preprocess_shader(const std::string &shader_source) const; + /** Load program and its dependencies. + * + * @param[in] program_name Name of the program to load. + */ + const GCProgram &load_program(const std::string &program_name) const; + /** Concatenates contents of a set into a single string. + * + * @param[in] s Input set to concatenate. + * + * @return Concatenated string. + */ + std::string stringify_set(const StringSet &s) const; + /** Set up EGL context. + */ + void setup_context(); + + EGLDisplay _display; /**< Underlying EGL Display. */ + EGLContext _context; /**< Underlying EGL Context. */ + GLuint _frame_buffer; /**< Dummy fbo */ + GLuint _tex_rt; /**< Dummy texture for render target */ + bool _own_context; /**< Self created context or not. */ + std::string _shader_path; /**< Path to the shaders folder. */ + mutable std::map _programs_map; /**< Map with all already loaded program data. */ + mutable std::map _built_programs_map; /**< Map with all already built program data. */ + static const std::map _shader_program_map; /**< Map that associates kernel names with programs. */ + static const std::map _program_source_map; /**< Contains sources for all programs. + Used for compile-time shader inclusion. */ +}; +} +#endif /* __ARM_COMPUTE_GCKERNELLIBRARY_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/GCKernels.h b/arm_compute/core/GLES_COMPUTE/GCKernels.h new file mode 100644 index 0000000000..57d11d5f18 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/GCKernels.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCKERNELS_H__ +#define __ARM_COMPUTE_GCKERNELS_H__ + +/* Header regrouping all the GLES compute kernels */ +#include "arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h" +#include "arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h" + +#endif /* __ARM_COMPUTE_GCKERNELS_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/IGCKernel.h b/arm_compute/core/GLES_COMPUTE/IGCKernel.h new file mode 100644 index 0000000000..0d3bfb30fd --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/IGCKernel.h @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IGCKERNEL_H__ +#define __ARM_COMPUTE_IGCKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/GCKernelLibrary.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" + +#include "arm_compute/core/IKernel.h" + +namespace arm_compute +{ +class IGCTensor; +class Window; + +/** Common interface for all the GLES kernels */ +class IGCKernel : public IKernel +{ +public: + /** Constructor */ + IGCKernel(); + /** Returns a reference to the GLES kernel of this object. + * + * @return A reference to the GLES kernel of this object. + */ + GCKernel &kernel(); + + class BufferParam + { + public: + /** Tensor's binding point in this kernel. */ + unsigned int binding_point = 0; + /** The base 2 logarithm of SSBO buffer data type size (Number of bits to be shift for offset calculation) */ + unsigned int buffer_data_type_shift = 0; + + /** Constructor + * + * @param[in] binding Tensor's binding point. + * @param[in] shift Number of bits to be shift for offset calculation + */ + BufferParam(const unsigned int binding, const unsigned int shift) + : binding_point(binding), buffer_data_type_shift(shift) + { + } + }; + + /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] binding_point Tensor's binding point in this kernel. + * @param[in] window Window the kernel will be executed on. + */ + void add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window); + + /** Add the passed 1D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] param Additional parameter for GLES SSBO buffer. + * @param[in] window Window the kernel will be executed on. + */ + void add_1D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam ¶m, const Window &window); + + /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] binding_point Tensor's binding point in this kernel. + * @param[in] window Window the kernel will be executed on. + */ + void add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window); + + /** Add the passed 2D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] param Additional parameter for GLES SSBO buffer. + * @param[in] window Window the kernel will be executed on. + */ + void add_2D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam ¶m, const Window &window); + + /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] binding_point Tensor's binding point in this kernel. + * @param[in] window Window the kernel will be executed on. + */ + void add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const unsigned int binding_point, const Window &window); + + /** Add the passed 3D tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] param Additional parameter for GLES SSBO buffer. + * @param[in] window Window the kernel will be executed on. + */ + void add_3D_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam ¶m, const Window &window); + + /** Returns the number of arguments enqueued per 1D tensor object. + * + * @return The number of arguments enqueues per 1D tensor object. + */ + unsigned int num_arguments_per_1D_tensor() const; + /** Returns the number of arguments enqueued per 2D tensor object. + * + * @return The number of arguments enqueues per 2D tensor object. + */ + unsigned int num_arguments_per_2D_tensor() const; + /** Returns the number of arguments enqueued per 3D tensor object. + * + * @return The number of arguments enqueues per 3D tensor object. + */ + unsigned int num_arguments_per_3D_tensor() const; + /** Enqueue the OpenGL ES shader to process the given window + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + */ + virtual void run(const Window &window) = 0; + +private: + /** Add the passed tensor's parameters to the object's kernel's arguments starting from the index idx. + * + * @param[in] idx Index at which to start adding the tensor's arguments.Input and output tensor will have sperated index, multiple indices start from 1, single index have to be set to 0. + * @param[in] tensor Tensor to set as an argument of the object's kernel. + * @param[in] param Additional parameter for GLES SSBO buffer. + * @param[in] window Window the kernel will be executed on. + */ + template + void add_tensor_argument(unsigned int &idx, const IGCTensor *tensor, const BufferParam ¶m, const Window &window); + + /** Returns the number of arguments enqueued per tensor object. + * + * @return The number of arguments enqueued per tensor object. + */ + template + unsigned int num_arguments_per_tensor() const; + +protected: + GCKernel _kernel; /**< GLES kernel to run */ +}; + +/** Add the kernel to the command queue with the given window. + * + * @note Depending on the size of the window, this might translate into several jobs being enqueued. + * + * @note If kernel->kernel() is empty then the function will return without adding anything to the queue. + * + * @param[in] kernel Kernel to enqueue + * @param[in] window Window the kernel has to process. + * @param[in] lws Local workgroup size requested, by default (1, 1, 1) + * + * @note If any dimension of the lws is greater than the global workgroup size then no lws will be passed. + */ +void enqueue(IGCKernel &kernel, const Window &window, const gles::NDRange &lws = gles::NDRange(1U, 1U, 1U)); +} +#endif /*__ARM_COMPUTE_IGCKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h b/arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h new file mode 100644 index 0000000000..413e86a2b7 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IGCSIMPLE2DKERNEL_H__ +#define __ARM_COMPUTE_IGCSIMPLE2DKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for simple OpenGL ES kernels having 1 tensor input and 1 tensor output. This interface can be used when the work-item processes a 2D tile */ +class IGCSimple2DKernel : public IGCSimpleKernel +{ +public: + // Inherited methods overridden: + void run(const Window &window) override; +}; +} +#endif /*__ARM_COMPUTE_IGCSIMPLE2DKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h b/arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h new file mode 100644 index 0000000000..622e53c38b --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IGCSIMPLE3DKERNEL_H__ +#define __ARM_COMPUTE_IGCSIMPLE3DKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for simple GLES kernels having 1 tensor input and 1 tensor output. + * Both input tensor and output tensor must have at least 3 dimensions. + */ +class IGCSimple3DKernel : public IGCSimple2DKernel +{ +public: + // Inherited methods overridden: + void run(const Window &window) override; +}; +} +#endif /*__ARM_COMPUTE_IGCSIMPLE3DKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h b/arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h new file mode 100644 index 0000000000..a23c4e774e --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/IGCSimpleKernel.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2016, 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IGCSIMPLEKERNEL_H__ +#define __ARM_COMPUTE_IGCSIMPLEKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" +#include "arm_compute/core/GLES_COMPUTE/IGCTensor.h" +#include "arm_compute/core/Helpers.h" + +namespace arm_compute +{ +/** Interface for simple OpenGL ES kernels having 1 tensor input and 1 tensor output */ +class IGCSimpleKernel : public IGCKernel +{ +public: + /** Constructor. */ + IGCSimpleKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + IGCSimpleKernel(const IGCSimpleKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + IGCSimpleKernel &operator=(const IGCSimpleKernel &) = delete; + /** Allow instances of this class to be moved. */ + IGCSimpleKernel(IGCSimpleKernel &&) = default; + /** Allow instances of this class to be moved. */ + IGCSimpleKernel &operator=(IGCSimpleKernel &&) = default; + /** Default destructor */ + ~IGCSimpleKernel() = default; + + /** Configure the kernel + * + * @param[in] input Source tensor. + * @param[out] output Destination tensor. + * @param[in] num_elems_processed_per_iteration Number of processed elements per iteration. + * @param[in] border_undefined (Optional) True if the border mode is undefined. False if it's replicate or constant. + * @param[in] border_size (Optional) Size of the border. + */ + void configure(const IGCTensor *input, IGCTensor *output, unsigned int num_elems_processed_per_iteration, bool border_undefined = false, const BorderSize &border_size = BorderSize()); + +protected: + const IGCTensor *_input; + IGCTensor *_output; +}; +} + +#endif /*__ARM_COMPUTE_IGCSIMPLEKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/IGCTensor.h b/arm_compute/core/GLES_COMPUTE/IGCTensor.h new file mode 100644 index 0000000000..ab4e57e0ce --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/IGCTensor.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_IGCTENSOR_H__ +#define __ARM_COMPUTE_IGCTENSOR_H__ + +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" +#include "arm_compute/core/ITensor.h" + +#include + +namespace arm_compute +{ +/** Interface for GLES Compute tensor */ +class IGCTensor : public ITensor +{ +public: + /** Default constructor. */ + IGCTensor(); + + /** Prevent instances of this class from being copied (As this class contains pointers). */ + IGCTensor(const IGCTensor &) = delete; + + /** Prevent instances of this class from being copy assigned (As this class contains pointers). */ + IGCTensor &operator=(const IGCTensor &) = delete; + + /** Allow instances of this class to be moved */ + IGCTensor(IGCTensor &&) = default; + + /** Allow instances of this class to be moved */ + IGCTensor &operator=(IGCTensor &&) = default; + + /** Virtual destructor */ + virtual ~IGCTensor() = default; + + /** Map on an allocated buffer. + * + * @param[in] blocking (Optional) If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + void map(bool blocking = true); + /** Unmap an allocated and mapped buffer. + */ + void unmap(); + /** Clear the contents of the tensor synchronously. + */ + void clear(); + + // Inherited methods overridden: + uint8_t *buffer() const override; + /** Interface to be implemented by the child class to return the tensor's gles compute buffer id. + * + * @return A SSBO buffer id. + */ + virtual GLuint gc_buffer() const = 0; + +protected: + /** Method to be implemented by the child class to map the SSBO. + * + * @param[in] blocking If true, then the mapping will be ready to use by the time + * this method returns, else it is the caller's responsibility + * to flush the queue and wait for the mapping operation to have completed before using the returned mapping pointer. + */ + virtual uint8_t *do_map(bool blocking) = 0; + /** Method to be implemented by the child class to unmap the SSBO. + * + * @note This method simply enqueues the unmap operation, it is the caller's responsibility to flush the queue and make sure the unmap is finished before + * the memory is accessed by the device. + */ + virtual void do_unmap() = 0; + +private: + uint8_t *_mapping; +}; + +using IGCImage = IGCTensor; +} +#endif /*__ARM_COMPUTE_IGCTENSOR_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/OpenGLES.h b/arm_compute/core/GLES_COMPUTE/OpenGLES.h new file mode 100644 index 0000000000..e123982945 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/OpenGLES.h @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_OPENGLES_H__ +#define __ARM_COMPUTE_OPENGLES_H__ + +#include "arm_compute/core/Log.h" + +#include +#include +#include +#include +#include +#include + +#ifdef ARM_COMPUTE_DEBUG_ENABLED +#define ARM_COMPUTE_GL_CHECK(x) \ + x; \ + { \ + GLenum error = glGetError(); \ + if(error != GL_NO_ERROR) \ + ARM_COMPUTE_LOG_INFO_MSG_WITH_FORMAT_CORE("glGetError() = %i (0x%.8x)\n", error, error); \ + } +#else /* ARM_COMPUTE_DEBUG_ENABLED */ +#define ARM_COMPUTE_GL_CHECK(x) x +#endif /* ARM_COMPUTE_DEBUG_ENABLED */ + +namespace arm_compute +{ +namespace gles +{ +/** Class interface for specifying NDRange values. */ +class NDRange +{ +private: + size_t _sizes[3]; + size_t _dimensions; + +public: + /** Default constructor - resulting range has zero dimensions. */ + NDRange() + : _dimensions(0) + { + _sizes[0] = 0; + _sizes[1] = 0; + _sizes[2] = 0; + } + + /** Constructs one-dimensional range. + * + * @param[in] size0 Size of the first dimension. + */ + NDRange(size_t size0) + : _dimensions(1) + { + _sizes[0] = size0; + _sizes[1] = 1; + _sizes[2] = 1; + } + + /** Constructs two-dimensional range. + * + * @param[in] size0 Size of the first dimension. + * @param[in] size1 Size of the second dimension. + */ + NDRange(size_t size0, size_t size1) + : _dimensions(2) + { + _sizes[0] = size0; + _sizes[1] = size1; + _sizes[2] = 1; + } + + /** Constructs three-dimensional range. + * + * @param[in] size0 Size of the first dimension. + * @param[in] size1 Size of the second dimension. + * @param[in] size2 Size of the third dimension. + */ + NDRange(size_t size0, size_t size1, size_t size2) + : _dimensions(3) + { + _sizes[0] = size0; + _sizes[1] = size1; + _sizes[2] = size2; + } + + /** Conversion operator to const size_t *. + * + * @returns A pointer to the size of the first dimension. + */ + operator const size_t *() const + { + return _sizes; + } + + /** Queries the number of dimensions in the range. + * + * @returns The number of dimensions. + */ + size_t dimensions() const + { + return _dimensions; + } + + /** Returns the size of the object in bytes based on the runtime number of dimensions + * + * @returns The size of the object in bytes. + */ + size_t size() const + { + return _dimensions * sizeof(size_t); + } + + /** Returns the sizes array for each dimensions. + * + * @returns The sizes array + */ + size_t *get() + { + return _sizes; + } + + /** Returns the sizes array for each dimensions. + * + * @returns The sizes array + */ + const size_t *get() const + { + return _sizes; + } +}; + +static const NDRange NullRange; +static const NDRange Range_128_1 = NDRange(128, 1); +} // namespace gles + +/** Check if the OpenGL ES 3.1 API is available at runtime. + * + * @returns true if the OpenGL ES 3.1 API is available. + */ +bool opengles31_is_available(); +} // namespace arm_compute + +#endif /* __ARM_COMPUTE_OPENGLES_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h new file mode 100644 index 0000000000..71f7b37700 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCAbsoluteDifferenceKernel.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCABSOLUTEDIFFERENCEKERNEL_H__ +#define __ARM_COMPUTE_GCABSOLUTEDIFFERENCEKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the absolute difference kernel. + * + * Absolute difference is computed by: + * @f[ output(x,y) = | input1(x,y) - input2(x,y) | @f] + */ +class GCAbsoluteDifferenceKernel : public IGCKernel +{ +public: + /** Default constructor. */ + GCAbsoluteDifferenceKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + GCAbsoluteDifferenceKernel(const GCAbsoluteDifferenceKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + GCAbsoluteDifferenceKernel &operator=(const GCAbsoluteDifferenceKernel &) = delete; + /** Allow instances of this class to be moved. */ + GCAbsoluteDifferenceKernel(GCAbsoluteDifferenceKernel &&) = default; + /** Allow instances of this class to be moved. */ + GCAbsoluteDifferenceKernel &operator=(GCAbsoluteDifferenceKernel &&) = default; + /** Default destructor */ + ~GCAbsoluteDifferenceKernel() = default; + + /** Set the inputs and output images. + * + * @param[in] input1 Source tensor. Data types supported: U8 + * @param[in] input2 Source tensor. Data types supported: U8 + * @param[out] output Destination tensor. Data types supported: U8 + */ + void configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IGCTensor *_input1; /**< Source tensor 1. */ + const IGCTensor *_input2; /**< Source tensor 2. */ + IGCTensor *_output; /**< Destination tensor. */ +}; +} +#endif /* __ARM_COMPUTE_GCABSOLUTEDIFFERENCEKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h new file mode 100644 index 0000000000..fc1d52f455 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCActivationLayerKernel.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCACTIVATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_GCACTIVATIONLAYERKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the activation layer kernel. */ +class GCActivationLayerKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCActivationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCActivationLayerKernel(const GCActivationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCActivationLayerKernel &operator=(const GCActivationLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + GCActivationLayerKernel(GCActivationLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + GCActivationLayerKernel &operator=(GCActivationLayerKernel &&) = default; + /** Default destructor */ + ~GCActivationLayerKernel() = default; + /** Set the input and output tensor. + * + * @note If the output tensor is a nullptr, the activation function will be performed in-place + * + * @param[in, out] input Source tensor. In case of @p output tensor = nullptr, this tensor will store the result + * of the activation function. Data types supported: F16/F32. + * @param[out] output Destination tensor. Data type should match the input data type. + * @param[in] act_info Activation layer information. + */ + void configure(IGCTensor *input, IGCTensor *output, ActivationLayerInfo act_info); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + IGCTensor *_input; + IGCTensor *_output; +}; +} +#endif /*__ARM_COMPUTE_GCACTIVATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h new file mode 100644 index 0000000000..2bbd6a83fe --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCBatchNormalizationLayerKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCBATCHNORMALIZATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_GCBATCHNORMALIZATIONLAYERKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the BatchNormalization layer kernel. + */ +class GCBatchNormalizationLayerKernel : public IGCKernel +{ +public: + /** Constructor */ + GCBatchNormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCBatchNormalizationLayerKernel(const GCBatchNormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCBatchNormalizationLayerKernel &operator=(const GCBatchNormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + GCBatchNormalizationLayerKernel(GCBatchNormalizationLayerKernel &&) = default; + /** Default move assignment operator. */ + GCBatchNormalizationLayerKernel &operator=(GCBatchNormalizationLayerKernel &&) = default; + /** Default destructor */ + ~GCBatchNormalizationLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input with dimensions [width, height, FM]. + * The rest are optional and used for representing batches. Data types supported: F16/F32. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data type supported: same as @p input + * @param[in] mean Mean values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] var Variance values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] beta Beta values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] gamma Gamma values tensor. 1 dimension with size equal to the feature maps [FM]. Data types supported: Same as @p input + * @param[in] epsilon Small value to avoid division with zero. + */ + void configure(const IGCTensor *input, IGCTensor *output, const IGCTensor *mean, const IGCTensor *var, const IGCTensor *beta, const IGCTensor *gamma, float epsilon); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IGCTensor *_input; + IGCTensor *_output; + const IGCTensor *_mean; + const IGCTensor *_var; + const IGCTensor *_beta; + const IGCTensor *_gamma; + float _epsilon; +}; +} +#endif /*__ARM_COMPUTE_GCBATCHNORMALIZATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h new file mode 100644 index 0000000000..257ab0eca0 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCCol2ImKernel.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_GCCOL2IMKERNEL_H__ +#define __ARM_COMPUTE_GCCOL2IMKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the col2im reshaping kernel. + * + * Rearranges each matrix column into image blocks. It's the inverse operation of @ref GCIm2ColKernel. + * + * For example, a vector of 9 elements can be reshaped to a block(image) of 3x3: + * + * @f[ + * \left( \begin{array}{ccccccccc} + * a0 & a1 & a2 & a3 & a4 & a5 & a6 & a7 & a8 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccc} + * a0 & a1 & a2 \\ + * a3 & a4 & a5 \\ + * a6 & a7 & a8 \\ + * \end{array} \right) + * @f] + */ +class GCCol2ImKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCCol2ImKernel(); + + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCCol2ImKernel(const GCCol2ImKernel &) = delete; + + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCCol2ImKernel &operator=(const GCCol2ImKernel &) = delete; + + /** Allow instances of this class to be moved */ + GCCol2ImKernel(GCCol2ImKernel &&) = default; + + /** Allow instances of this class to be moved */ + GCCol2ImKernel &operator=(GCCol2ImKernel &&) = default; + + /** Default destructor */ + ~GCCol2ImKernel() = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. Data types supported: F32 + * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], + * while the rest represent batch of outputs. Data types supported: Same as @p input + * @param[in] convolved_dims Output convolved dimensions. + */ + void configure(const IGCTensor *input, IGCTensor *output, std::pair convolved_dims); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IGCTensor *_input; + IGCTensor *_output; + std::pair _convolved_dims; +}; +} + +#endif /*__ARM_COMPUTE_GCCOL2IMKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h new file mode 100644 index 0000000000..9a34a9a9c5 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthConcatenateKernel.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__ +#define __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the depth concatenate kernel. + * The input tensor will be concatenated into the output tensor. + */ +class GCDepthConcatenateKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCDepthConcatenateKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCDepthConcatenateKernel(const GCDepthConcatenateKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCDepthConcatenateKernel &operator=(const GCDepthConcatenateKernel &) = delete; + /** Allow instances of this class to be moved */ + GCDepthConcatenateKernel(GCDepthConcatenateKernel &&) = default; + /** Allow instances of this class to be moved */ + GCDepthConcatenateKernel &operator=(GCDepthConcatenateKernel &&) = default; + /** Default destructor */ + ~GCDepthConcatenateKernel() = default; + /** Initialise the kernel's inputs and output + * + * @param[in] input Input tensor. Data types supported: F16/F32. + * @param[in] depth_offset The offset on the Z axis. + * @param[in,out] output Output tensor. Data types supported: Same as @p input. + * + * @note: The output tensor's low two dimensions can't be smaller than the input one's. + * @note: The gaps between the two lowest dimensions of input and output need to be divisible by 2. + * + */ + void configure(const IGCTensor *input, unsigned int depth_offset, IGCTensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const IGCTensor *_input; + IGCTensor *_output; + int _top_bottom; + int _left_right; +}; +} +#endif /* __ARM_COMPUTE_GCDEPTHCONCATENATEKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h new file mode 100644 index 0000000000..415b781bc6 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCDirectConvolutionLayerKernel.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYERKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" +#include "arm_compute/core/GLES_COMPUTE/OpenGLES.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the direct convolution kernel. + */ +template +class GCDirectConvolutionLayerKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCDirectConvolutionLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCDirectConvolutionLayerKernel(const GCDirectConvolutionLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCDirectConvolutionLayerKernel &operator=(const GCDirectConvolutionLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + GCDirectConvolutionLayerKernel(GCDirectConvolutionLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + GCDirectConvolutionLayerKernel &operator=(GCDirectConvolutionLayerKernel &&) = default; + /** Default destructor */ + ~GCDirectConvolutionLayerKernel() = default; + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F16/F32 + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. + * @param[in] bias Biases tensor. Shared bias supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input. + * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, + * while every dimension above represents a batch. Data types supported: Same as @p input + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *bias, IGCTensor *output, const PadStrideInfo &conv_info); + + // Inherited methods overridden: + BorderSize border_size() const override; + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IGCTensor *_input; + const IGCTensor *_bias; + const IGCTensor *_weights; + IGCTensor *_output; + BorderSize _border_size; + int _conv_stride_x; + int _conv_stride_y; + int _conv_pad_x; + int _conv_pad_y; + gles::NDRange _lws; +}; + +using GCDirectConvolutionLayer1x1Kernel = GCDirectConvolutionLayerKernel<1>; +using GCDirectConvolutionLayer3x3Kernel = GCDirectConvolutionLayerKernel<3>; +using GCDirectConvolutionLayer5x5Kernel = GCDirectConvolutionLayerKernel<5>; +} +#endif /*__ARM_COMPUTE_GCDIRECTCONVOLUTIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h new file mode 100644 index 0000000000..6159a7af26 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCDropoutKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __ARM_COMPUTE_GCDROPOUTKERNEL_H__ +#define __ARM_COMPUTE_GCDROPOUTKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the dropout kernel. + * + * Dropout is used to improve over-fit on neural networks. + * + */ +class GCDropoutKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCDropoutKernel(); + + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCDropoutKernel(const GCDropoutKernel &) = delete; + + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCDropoutKernel &operator=(const GCDropoutKernel &) = delete; + + /** Allow instances of this class to be moved */ + GCDropoutKernel(GCDropoutKernel &&) = default; + + /** Allow instances of this class to be moved */ + GCDropoutKernel &operator=(GCDropoutKernel &&) = default; + + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor for this op. Data types supported: F16/F32 + * @param[out] mask The mask tensor. Data types supported: Same as @p input + * @param[out] output The output tensor. Data types supported: Same as @p input + * @param[in] ratio Dropout ratio + * @param[in] forward Forward or backward propagation + * + */ + void configure(const IGCTensor *input, IGCTensor *mask, IGCTensor *output, float ratio, bool forward); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IGCTensor *_input; + IGCTensor *_mask; + IGCTensor *_output; + unsigned int _num_elems_processed_per_iteration; +}; +} + +#endif /*__ARM_COMPUTE_GCDROPOUTKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h new file mode 100644 index 0000000000..acb8aa67d3 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCFillBorderKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCFILLBORDERKERNEL_H__ +#define __ARM_COMPUTE_GCFILLBORDERKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" +#include "arm_compute/core/PixelValue.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for filling the border of a kernel */ +class GCFillBorderKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCFillBorderKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCFillBorderKernel(const GCFillBorderKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCFillBorderKernel &operator=(const GCFillBorderKernel &) = delete; + /** Allow instances of this class to be moved */ + GCFillBorderKernel(GCFillBorderKernel &&) = default; + /** Allow instances of this class to be moved */ + GCFillBorderKernel &operator=(GCFillBorderKernel &&) = default; + /** Default destructor */ + ~GCFillBorderKernel() = default; + + /** Initialise the kernel's input, output and border mode. + * + * @param[in,out] tensor Tensor to process Data types supported: F16/F32. + * @param[in] border_size Size of the border to fill in elements. + * @param[in] border_mode Border mode to use for the convolution. + * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. + */ + void configure(const IGCTensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue()); + + /** Function to set the constant value on fill border kernel depending on type. + * + * @param[in] idx Index of the kernel argument to set. + * @param[in] constant_border_value Constant value to use for borders if border_mode is set to CONSTANT. + */ + template + void set_constant_border(unsigned int idx, const PixelValue &constant_border_value); + + // Inherited methods overridden: + void run(const Window &window) override; + bool is_parallelisable() const override; + +private: + const IGCTensor *_tensor; +}; +} +#endif /*__ARM_COMPUTE_GCFILLBORDERKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h new file mode 100644 index 0000000000..b2369a6ad1 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMInterleave4x4Kernel.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCGEMMINTERLEAVE4X4KERNEL_H__ +#define __ARM_COMPUTE_GCGEMMINTERLEAVE4X4KERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** OpenGL ES kernel which interleaves the elements of a matrix A in chunk of 4x4 + * + * This function puts the values in a 4x4 block of Matrix A on the same row (Interleaved values) + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccccccccccc} + * a00 & a10 & a20 & a30 & a01 & a11 & a21 & a31 & a02 & a12 & a22 & a32 & a03 & a13 & a23 & a33 \\ + * \end{array} \right) + * @f] + * + * After this operation, the output matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ] + */ +class GCGEMMInterleave4x4Kernel : public IGCKernel +{ +public: + /** Default constructor */ + GCGEMMInterleave4x4Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCGEMMInterleave4x4Kernel(const GCGEMMInterleave4x4Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCGEMMInterleave4x4Kernel &operator=(const GCGEMMInterleave4x4Kernel &) = delete; + /** Allow instances of this class to be moved */ + GCGEMMInterleave4x4Kernel(GCGEMMInterleave4x4Kernel &&) = default; + /** Allow instances of this class to be moved */ + GCGEMMInterleave4x4Kernel &operator=(GCGEMMInterleave4x4Kernel &&) = default; + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: F32 + * @param[out] output Output tensor. Data type supported: same as @p input + */ + void configure(const IGCTensor *input, IGCTensor *output); + + // Inherited methods overridden + void run(const Window &window) override; + +private: + const IGCTensor *_input; + IGCTensor *_output; +}; +} +#endif /* __ARM_COMPUTE_GCGEMMINTERLEAVE4X4KERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h new file mode 100644 index 0000000000..20f28cbb65 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAccumulateBiasesKernel.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCGEMMMATRIXACCUMULATEBIASESKERNEL_H__ +#define __ARM_COMPUTE_GCGEMMMATRIXACCUMULATEBIASESKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +/** Interface to add a bias to each row of the input tensor + * + */ +class GCGEMMMatrixAccumulateBiasesKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCGEMMMatrixAccumulateBiasesKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCGEMMMatrixAccumulateBiasesKernel(const GCGEMMMatrixAccumulateBiasesKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCGEMMMatrixAccumulateBiasesKernel &operator=(const GCGEMMMatrixAccumulateBiasesKernel &) = delete; + /** Allow instances of this class to be moved */ + GCGEMMMatrixAccumulateBiasesKernel(GCGEMMMatrixAccumulateBiasesKernel &&) = default; + /** Allow instances of this class to be moved */ + GCGEMMMatrixAccumulateBiasesKernel &operator=(GCGEMMMatrixAccumulateBiasesKernel &&) = default; + /** Set the accumulate buffer and the biases of the kernel. + * + * @param[in, out] accum The accumulate tensor to convert. Data types supported: F16/F32 + * @param[in] biases The shared biases tensor to append. It must be 1D tensor. Data types supported: Same as @p input + */ + void configure(IGCTensor *accum, const IGCTensor *biases); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + IGCTensor *_accum; + const IGCTensor *_biases; +}; +} + +#endif /*__ARM_COMPUTE_GCGEMMMATRIXACCUMULATEBIASESKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h new file mode 100644 index 0000000000..02abb8da76 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixAdditionKernel.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCGEMMMATRIXADDITIONKERNEL_H__ +#define __ARM_COMPUTE_GCGEMMMATRIXADDITIONKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** OpenGL ES kernel to perform the in-place matrix addition between 2 matrices, taking into account that the second matrix might be weighted by a scalar value beta. + * The matrices must have the same dimensions + * + * @note This kernel is computed if and only if beta != 0.0. + */ +class GCGEMMMatrixAdditionKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCGEMMMatrixAdditionKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCGEMMMatrixAdditionKernel(const GCGEMMMatrixAdditionKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCGEMMMatrixAdditionKernel &operator=(const GCGEMMMatrixAdditionKernel &) = delete; + /** Allow instances of this class to be moved */ + GCGEMMMatrixAdditionKernel(GCGEMMMatrixAdditionKernel &&) = default; + /** Allow instances of this class to be moved */ + GCGEMMMatrixAdditionKernel &operator=(GCGEMMMatrixAdditionKernel &&) = default; + /** Initialise the kernel's input, output and beta value + * + * @note The input and output tensors must have the same dimensions + * + * @param[in] input Input tensor (Matrix C). Data types supported: F32 + * @param[in, out] output Output tensor. If this kernel is used to finalize the GEMM result (alpha * AB + beta * C), output must contain the result obtained by @ref GCGEMMMatrixMultiplyKernel. Data type supported: same as @p input + * @param[in] beta Weight of matrix C + */ + void configure(const IGCTensor *input, IGCTensor *output, float beta); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IGCTensor *_input; + IGCTensor *_output; +}; +} + +#endif /* __ARM_COMPUTE_GCGEMMMATRIXADDITIONKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h new file mode 100644 index 0000000000..3a0b22f148 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMMatrixMultiplyKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCGEMMMATRIXMULTIPLYKERNEL_H__ +#define __ARM_COMPUTE_GCGEMMMATRIXMULTIPLYKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** GLES Compute kernel to multiply two input matrices "A" and "B" or to multiply a vector "A" by a matrix "B". All elements of the output matrix/vector will be multiplied by alpha + * + * @note If the output tensor is a matrix, the implementation assumes that the input tensors @p input0 and @p input1 are both matrices and reshaped respectively with @ref GCGEMMInterleave4x4Kernel" and @ref GCGEMMTranspose1xWKernel + * @note If the output tensor is a vector and the data type is F32, the implementation assumes that the first input tensor @p input0 is a vector and the second input tensor @p input1 a matrix. The implementation also assumes that both tensors have not been reshaped + * + * @attention The second input tensor must have at least 2 dimensions (matrix) + * + */ +class GCGEMMMatrixMultiplyKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCGEMMMatrixMultiplyKernel(); + + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCGEMMMatrixMultiplyKernel(const GCGEMMMatrixMultiplyKernel &) = delete; + + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCGEMMMatrixMultiplyKernel &operator=(const GCGEMMMatrixMultiplyKernel &) = delete; + + /** Allow instances of this class to be moved */ + GCGEMMMatrixMultiplyKernel(GCGEMMMatrixMultiplyKernel &&) = default; + + /** Allow instances of this class to be moved */ + GCGEMMMatrixMultiplyKernel &operator=(GCGEMMMatrixMultiplyKernel &&) = default; + + /** Initialise the kernel's input, output and alpha + * + * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: F16/F32 + * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector. + * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] alpha Weight of the matrix product + * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref GCGEMMInterleave4x4Kernel and @ref GCGEMMTranspose1xWKernel + */ + void configure(const IGCTensor *input0, const IGCTensor *input1, IGCTensor *output, float alpha, bool is_interleaved_transposed = true); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IGCTensor *_input0; + const IGCTensor *_input1; + IGCTensor *_output; +}; +} +#endif /* __ARM_COMPUTE_GCGEMMMATRIXMULTIPLYKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h new file mode 100644 index 0000000000..4223556ac4 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCGEMMTranspose1xWKernel.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCGEMMTRANSPOSE1XWKERNEL_H__ +#define __ARM_COMPUTE_GCGEMMTRANSPOSE1XWKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** OpenGLES kernel which transposes the elements of a matrix in chunks of 1xW, where W is equal to (16 / element size of the tensor) + * + * Following an example of how the transposition1xW works when the input data type is F32 + * + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * \rightarrow + * \left( \begin{array}{ccccccccccccccccc} + * a00 & a01 & a02 & a03 & a10 & a11 & a12 & a13 & a20 & a21 & a22 & a23 & a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * @f] + * + * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor) + * + */ +class GCGEMMTranspose1xWKernel : public IGCSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: F32 + * @param[out] output Output tensor. Data type supported: same as @p input + */ + void configure(const IGCTensor *input, IGCTensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; +}; +} +#endif /* __ARM_COMPUTE_GCGEMMTRANSPOSE1XWKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h new file mode 100644 index 0000000000..e1b35607ff --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h @@ -0,0 +1,109 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCIM2COLKERNEL_H__ +#define __ARM_COMPUTE_GCIM2COLKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the im2col reshape kernel. + * + * Rearranges image blocks into columns. It is used to strip out each convolution block to a single column. + * It is used to transform a convolution to a plain matrix multiplication. + * + * For example taking into account the image below and assuming 3x3 image blocks with stride of 1 we have: + * @f[ + * \left( \begin{array}{cccc} + * a00 & a01 & a02 & a03 \\ + * a10 & a11 & a12 & a13 \\ + * a20 & a21 & a22 & a23 \\ + * a30 & a31 & a32 & a33 \\ + * \end{array} \right) + * = + * \left( \begin{array}{ccccccccc} + * a00 & a01 & a02 & a10 & a11 & a12 & a20 & a21 & a22 \\ + * a01 & a02 & a03 & a11 & a12 & a13 & a21 & a22 & a23 \\ + * a10 & a11 & a12 & a20 & a21 & a22 & a30 & a31 & a32 \\ + * a11 & a12 & a13 & a21 & a22 & a23 & a31 & a32 & a33 \\ + * \end{array} \right) + * @f] + */ +class GCIm2ColKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCIm2ColKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCIm2ColKernel(const GCIm2ColKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCIm2ColKernel &operator=(const GCIm2ColKernel &) = delete; + /** Allow instances of this class to be moved */ + GCIm2ColKernel(GCIm2ColKernel &&) = default; + /** Allow instances of this class to be moved */ + GCIm2ColKernel &operator=(GCIm2ColKernel &&) = default; + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F32 + * @param[out] output The output tensor. First 2 lower dimensions represent a transform of each 3D input, + * while every dimension above represents a batch. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + */ + void configure(const IGCTensor *input, IGCTensor *output, std::pair kernel_dims, const PadStrideInfo &conv_info, bool has_bias); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + /** Run the reshape kernel optimised for the special case (stride is 1, padding is 0 and kernel's low 3 dimensions are same as input) + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel. + */ + void run_reduced(const Window &window); + /** run the generic convolution layer input reshape kernel + * + * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). + * @param[in,out] queue Command queue on which to enqueue the kernel. + */ + void run_generic(const Window &window); + + /** Common signature for the kernel to run */ + using Im2ColFunction = void (GCIm2ColKernel::*)(const Window &); + +private: + const IGCTensor *_input; + IGCTensor *_output; + std::pair _convolved_dims; + unsigned int _num_elems_processed_per_iteration; + Im2ColFunction _run_func; +}; +} + +#endif /*__ARM_COMPUTE_GCIM2COLKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h new file mode 100644 index 0000000000..e8bc7ad2b2 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCNormalizationLayerKernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCNORMALIZATIONLAYERKERNEL_H__ +#define __ARM_COMPUTE_GCNORMALIZATIONLAYERKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the normalization layer kernel. + */ +class GCNormalizationLayerKernel : public IGCKernel +{ +public: + /** Constructor */ + GCNormalizationLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCNormalizationLayerKernel(const GCNormalizationLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCNormalizationLayerKernel &operator=(const GCNormalizationLayerKernel &) = delete; + /** Default Move Constructor. */ + GCNormalizationLayerKernel(GCNormalizationLayerKernel &&) = default; + /** Default move assignment operator. */ + GCNormalizationLayerKernel &operator=(GCNormalizationLayerKernel &&) = default; + /** Default destrutor */ + ~GCNormalizationLayerKernel() = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. 3 lower dims represent a single input with dimensions [width, height, IFM], + * and an optional 4th dimension for batch of inputs. Data types supported: F32. + * @param[in] squared_input Source with each element has been squared. 3 lower dims represent a single input with dimensions [width, height, IFM], + * Data types should match the input type. + * @param[out] output Destination tensor. Output will have the same number of dimensions as input. Data types should match the input type. + * @param[in] norm_info Normalization layer information like the normalization type, normalization size and other parameters. + */ + void configure(const IGCTensor *input, const IGCTensor *squared_input, IGCTensor *output, NormalizationLayerInfo norm_info); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const IGCTensor *_input; + const IGCTensor *_squared_input; + IGCTensor *_output; + BorderSize _border_size; +}; +} +#endif /*__ARM_COMPUTE_GCNORMALIZATIONLAYERKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h new file mode 100644 index 0000000000..3b01b4ad4d --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCPixelWiseMultiplicationKernel.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCPIXELWISEMULTIPLICATIONKERNEL_H__ +#define __ARM_COMPUTE_GCPIXELWISEMULTIPLICATIONKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" +#include "arm_compute/core/Types.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the pixelwise multiplication kernel. + * + */ +class GCPixelWiseMultiplicationKernel : public IGCKernel +{ +public: + /** Default constructor.*/ + GCPixelWiseMultiplicationKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers). */ + GCPixelWiseMultiplicationKernel(const GCPixelWiseMultiplicationKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers). */ + GCPixelWiseMultiplicationKernel &operator=(const GCPixelWiseMultiplicationKernel &) = delete; + /** Allow instances of this class to be moved */ + GCPixelWiseMultiplicationKernel(GCPixelWiseMultiplicationKernel &&) = default; + /** Allow instances of this class to be moved */ + GCPixelWiseMultiplicationKernel &operator=(GCPixelWiseMultiplicationKernel &&) = default; + /** Initialise the kernel's input, output and border mode. + * + * @param[in] input1 An input tensor. Data types supported: F32. + * @param[in] input2 An input tensor. Data types supported: same as @p input1. + * @param[out] output The output tensor, Data types supported: same as @p input1. + * @param[in] scale Scale to apply after multiplication. + * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. + */ + void configure(const IGCTensor *input1, const IGCTensor *input2, IGCTensor *output, float scale); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IGCTensor *_input1; + const IGCTensor *_input2; + IGCTensor *_output; +}; +} + +#endif /*__ARM_COMPUTE_GCPIXELWISEMULTIPLICATIONKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h new file mode 100644 index 0000000000..d4921c2092 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCPOOLINGLAYERKERNEL_H__ +#define __ARM_COMPUTE_GCPOOLINGLAYERKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the pooling layer kernel */ +class GCPoolingLayerKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCPoolingLayerKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCPoolingLayerKernel(const GCPoolingLayerKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCPoolingLayerKernel &operator=(const GCPoolingLayerKernel &) = delete; + /** Allow instances of this class to be moved */ + GCPoolingLayerKernel(GCPoolingLayerKernel &&) = default; + /** Allow instances of this class to be moved */ + GCPoolingLayerKernel &operator=(GCPoolingLayerKernel &&) = default; + /** Default destructor */ + ~GCPoolingLayerKernel() = default; + + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16/F32. + * @param[out] output Destination tensor. Data types supported: Same as @p input. + * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. + */ + void configure(const IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run(const Window &window) override; + BorderSize border_size() const override; + +private: + const IGCTensor *_input; + IGCTensor *_output; + PoolingLayerInfo _pool_info; + BorderSize _border_size; + unsigned int _num_elems_processed_per_iteration; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_GCPOOLINGLAYERKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h new file mode 100644 index 0000000000..b9eb305bab --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCSoftmaxLayerKernel.h @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCSOFTMAXLAYERKERNEL_H__ +#define __ARM_COMPUTE_GCSOFTMAXLAYERKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCSimple3DKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** Interface for the identifying the max value of 1D Logits */ +class GCLogits1DMaxKernel : public IGCSimple3DKernel +{ +public: + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16/F32 + * @param[out] output Destination tensor. Data types supported: same as @p input + */ + void configure(const IGCTensor *input, IGCTensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; +}; + +/** Interface for shifting the logits values around the max value and exponentiating the result */ +class GCLogits1DShiftExpSumKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCLogits1DShiftExpSumKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCLogits1DShiftExpSumKernel(const GCLogits1DShiftExpSumKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCLogits1DShiftExpSumKernel &operator=(const GCLogits1DShiftExpSumKernel &) = delete; + /** Allow instances of this class to be moved */ + GCLogits1DShiftExpSumKernel(GCLogits1DShiftExpSumKernel &&) = default; + /** Allow instances of this class to be moved */ + GCLogits1DShiftExpSumKernel &operator=(GCLogits1DShiftExpSumKernel &&) = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16/F32 + * @param[in] max Max values tensor. Data types supported: same as @p input + * @param[out] output Destination tensor. Data types supported: same as @p input + * @param[out] sum Sum of 1D logits tensor. Data types supported: same as @p input + */ + void configure(const IGCTensor *input, const IGCTensor *max, IGCTensor *output, IGCTensor *sum); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IGCTensor *_input; + const IGCTensor *_max; + IGCTensor *_output; + IGCTensor *_sum; +}; + +/** Interface for calculating the final step of the Softmax Layer where each logit value is multiplied by the inverse of the sum of the logits. */ +class GCLogits1DNormKernel : public IGCKernel +{ +public: + /** Default constructor */ + GCLogits1DNormKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCLogits1DNormKernel(const GCLogits1DNormKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GCLogits1DNormKernel &operator=(const GCLogits1DNormKernel &) = delete; + /** Allow instances of this class to be moved */ + GCLogits1DNormKernel(GCLogits1DNormKernel &&) = default; + /** Allow instances of this class to be moved */ + GCLogits1DNormKernel &operator=(GCLogits1DNormKernel &&) = default; + /** Set the input and output tensors. + * + * @param[in] input Source tensor. Data types supported: F16/F32 + * @param[in] sum Sum tensor. Dimensions should be dim(input)-1. Data types supported: same as @p input + * @param[out] output Destination tensor. Data types supported: same as @p input + */ + void configure(const IGCTensor *input, const IGCTensor *sum, IGCTensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; + +private: + const IGCTensor *_input; + const IGCTensor *_sum; + IGCTensor *_output; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_GCSOFTMAXLAYERKERNEL_H__ */ diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h new file mode 100644 index 0000000000..c628a00585 --- /dev/null +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCTransposeKernel.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_GCTRANSPOSEKERNEL_H__ +#define __ARM_COMPUTE_GCTRANSPOSEKERNEL_H__ + +#include "arm_compute/core/GLES_COMPUTE/IGCSimple2DKernel.h" + +namespace arm_compute +{ +class IGCTensor; + +/** OpenGL ES kernel which transposes the elements of a matrix. + * + * [width, height, batch] -> [height, width, batch] + * + */ +class GCTransposeKernel : public IGCSimple2DKernel +{ +public: + /** Initialise the kernel's input and output. + * + * @param[in] input Input tensor. Data types supported: F16/F32 + * @param[out] output Output tensor. Data type supported: Same as @p input + */ + void configure(const IGCTensor *input, IGCTensor *output); + + // Inherited methods overridden: + void run(const Window &window) override; +}; +} +#endif /* __ARM_COMPUTE_GCTRANSPOSEKERNEL_H__ */ -- cgit v1.2.1