From 9fe414430c3c989b1cdc79d41e031495aed2cb7c Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Wed, 23 Aug 2017 16:36:24 +0100 Subject: COMPMID-452 CL Generic Depthwise Convolution implementation. Change-Id: I115e48fe6ce5e281f3791aa5d80fdc754cdd2b5e Reviewed-on: http://mpd-gerrit.cambridge.arm.com/85082 Tested-by: Kaizen Reviewed-by: Gian Marco Iodice --- arm_compute/core/CL/CLKernels.h | 5 +- arm_compute/core/CL/OpenCL.h | 2 + .../CL/kernels/CLDepthwiseConvolution3x3Kernel.h | 72 ++++++++++++++++++++++ .../core/CL/kernels/CLDepthwiseConvolutionKernel.h | 72 ---------------------- .../core/CL/kernels/CLDepthwiseIm2ColKernel.h | 70 +++++++++++++++++++++ .../CL/kernels/CLDepthwiseVectorToTensorKernel.h | 70 +++++++++++++++++++++ .../CL/kernels/CLDepthwiseWeightsReshapeKernel.h | 66 ++++++++++++++++++++ .../CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h | 67 ++++++++++++++++++++ .../runtime/CL/functions/CLDepthwiseConvolution.h | 72 +++++++++++++++++----- 9 files changed, 406 insertions(+), 90 deletions(-) create mode 100644 arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h delete mode 100644 arm_compute/core/CL/kernels/CLDepthwiseConvolutionKernel.h create mode 100644 arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h create mode 100644 arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h create mode 100644 arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h create mode 100644 arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h (limited to 'arm_compute') diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h index f8aa5f8968..de40b85080 100644 --- a/arm_compute/core/CL/CLKernels.h +++ b/arm_compute/core/CL/CLKernels.h @@ -44,7 +44,9 @@ #include "arm_compute/core/CL/kernels/CLConvolutionKernel.h" #include "arm_compute/core/CL/kernels/CLDepthConcatenateKernel.h" #include "arm_compute/core/CL/kernels/CLDepthConvertKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionKernel.h" +#include "arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h" +#include "arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h" #include "arm_compute/core/CL/kernels/CLDerivativeKernel.h" #include "arm_compute/core/CL/kernels/CLDilateKernel.h" #include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h" @@ -57,6 +59,7 @@ #include "arm_compute/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" #include "arm_compute/core/CL/kernels/CLGaussian3x3Kernel.h" #include "arm_compute/core/CL/kernels/CLGaussian5x5Kernel.h" diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h index 562f30bf14..897e9368f1 100644 --- a/arm_compute/core/CL/OpenCL.h +++ b/arm_compute/core/CL/OpenCL.h @@ -82,6 +82,7 @@ public: using clReleaseKernel_func = cl_int (*)(cl_kernel kernel); using clGetDeviceInfo_func = cl_int (*)(cl_device_id, cl_device_info, size_t, void *, size_t *); using clGetDeviceIDs_func = cl_int (*)(cl_platform_id, cl_device_type, cl_uint, cl_device_id *, cl_uint *); + using clRetainEvent_func = cl_int (*)(cl_event); clBuildProgram_func clBuildProgram = nullptr; clEnqueueNDRangeKernel_func clEnqueueNDRangeKernel = nullptr; @@ -111,6 +112,7 @@ public: clReleaseMemObject_func clReleaseMemObject = nullptr; clGetDeviceInfo_func clGetDeviceInfo = nullptr; clGetDeviceIDs_func clGetDeviceIDs = nullptr; + clRetainEvent_func clRetainEvent = nullptr; private: std::pair _loaded{ false, false }; diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h new file mode 100644 index 0000000000..4e69f551b8 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONKERNEL3x3_H__ +#define __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONKERNEL3x3_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor. + */ +class CLDepthwiseConvolution3x3Kernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDepthwiseConvolution3x3Kernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseConvolution3x3Kernel(const CLDepthwiseConvolution3x3Kernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseConvolution3x3Kernel &operator=(const CLDepthwiseConvolution3x3Kernel &) = delete; + /** Default Move Constructor. */ + CLDepthwiseConvolution3x3Kernel(CLDepthwiseConvolution3x3Kernel &&) = default; + /** Default move assignment operator. */ + CLDepthwiseConvolution3x3Kernel &operator=(CLDepthwiseConvolution3x3Kernel &&) = default; + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in] input Source tensor. DataType supported: F32. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] weights Weights tensor. These are 3D tensors with dimensions [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + */ + void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *weights, const PadStrideInfo &conv_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + BorderSize _border_size; + const ICLTensor *_input; + ICLTensor *_output; + const ICLTensor *_weights; + unsigned int _conv_stride_x; + unsigned int _conv_stride_y; + unsigned int _conv_pad_x; + unsigned int _conv_pad_y; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTIONKERNEL3x3_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionKernel.h deleted file mode 100644 index 0631f07667..0000000000 --- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionKernel.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2017 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONKERNEL_H__ -#define __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor. - */ -class CLDepthwiseConvolutionKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLDepthwiseConvolutionKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthwiseConvolutionKernel(const CLDepthwiseConvolutionKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthwiseConvolutionKernel &operator=(const CLDepthwiseConvolutionKernel &) = delete; - /** Default Move Constructor. */ - CLDepthwiseConvolutionKernel(CLDepthwiseConvolutionKernel &&) = default; - /** Default move assignment operator. */ - CLDepthwiseConvolutionKernel &operator=(CLDepthwiseConvolutionKernel &&) = default; - /** Initialize the function's source, destination, conv and border_size. - * - * @param[in] input Source tensor. DataType supported: F32. - * @param[out] output Destination tensor. DataType supported: F32. - * @param[in] weights Weights tensor. These are 3D tensors with dimensions [3, 3, IFM]. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. DataType supported: F32. - */ - void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *weights, const PadStrideInfo &conv_info); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: - BorderSize _border_size; - const ICLTensor *_input; - ICLTensor *_output; - const ICLTensor *_weights; - unsigned int _conv_stride_x; - unsigned int _conv_stride_y; - unsigned int _conv_pad_x; - unsigned int _conv_pad_y; -}; -} // namespace arm_compute -#endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTIONKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h new file mode 100644 index 0000000000..ae56adfa30 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDEPTHWISEIM2COLKERNEL_H__ +#define __ARM_COMPUTE_CLDEPTHWISEIM2COLKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" +#include "arm_compute/core/Size2D.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the depthwise im2col reshape kernel. + * This kernel reshape the input low 3 dimensions to a new 3D shape where the output's first dimension is + * the linear patch size (FILTER_WIDTH * FILTER_HEIGHT) and second dimension is number of patches in per image and third dimension unchanged . + **/ +class CLDepthwiseIm2ColKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDepthwiseIm2ColKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseIm2ColKernel(const CLDepthwiseIm2ColKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseIm2ColKernel &operator=(const CLDepthwiseIm2ColKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDepthwiseIm2ColKernel(CLDepthwiseIm2ColKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDepthwiseIm2ColKernel &operator=(CLDepthwiseIm2ColKernel &&) = default; + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F32 + * @param[out] output The output tensor. First 3 lower dimensions represent a transform of each 3D input, + * while every dimension above 3 represents a batch. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + */ + void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // arm_compute +#endif /*__ARM_COMPUTE_CLDEPTHWISEIM2COLKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h new file mode 100644 index 0000000000..1dae9b2b5f --- /dev/null +++ b/arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDEPTHWISEVECTORTOTENSORKERNEL_H__ +#define __ARM_COMPUTE_CLDEPTHWISEVECTORTOTENSORKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the depthwise vector to tensor kernel. + * + * This kernel takes the 1D tensor that's been produced by the MatrixVectorMultiply + * kernel and reshapes it to given width and height (previously calculated, based + * on input/weights dimensions and convolution strides and padding). + * + **/ +class CLDepthwiseVectorToTensorKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDepthwiseVectorToTensorKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseVectorToTensorKernel(const CLDepthwiseVectorToTensorKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseVectorToTensorKernel &operator=(const CLDepthwiseVectorToTensorKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDepthwiseVectorToTensorKernel(CLDepthwiseVectorToTensorKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDepthwiseVectorToTensorKernel &operator=(CLDepthwiseVectorToTensorKernel &&) = default; + /** Set the input and output of the kernel. + * + * @param[in] input The input vector to convert. Data type supported: F32. + * @param[out] output The output tensor. 3 lower dimensions represent a single input [width, height, IFM]. Data type supported: same as @p input. + * @param[in] conv_w The converted tensor's width. + * @param[in] conv_h The converted tensor's height. + */ + void configure(const ICLTensor *input, ICLTensor *output, size_t conv_w, size_t conv_h); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // arm_compute +#endif /*__ARM_COMPUTE_CLDEPTHWISEVECTORTOTENSORKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h new file mode 100644 index 0000000000..d493d9f052 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDEPTHWISEWEIGHTSRESHAPEKERNEL_H__ +#define __ARM_COMPUTE_CLDEPTHWISEWEIGHTSRESHAPEKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the depthwise weights reshape kernel. + * This kernel reshape original weights' low 2D dimensions into a single row and + * have the second dimension as the original depth size. + * + **/ +class CLDepthwiseWeightsReshapeKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDepthwiseWeightsReshapeKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseWeightsReshapeKernel(const CLDepthwiseWeightsReshapeKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseWeightsReshapeKernel &operator=(const CLDepthwiseWeightsReshapeKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDepthwiseWeightsReshapeKernel(CLDepthwiseWeightsReshapeKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDepthwiseWeightsReshapeKernel &operator=(CLDepthwiseWeightsReshapeKernel &&) = default; + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM]. Data type supported: F32. + * @param[out] output The output tensor. Data type supported: same as @p input. + */ + void configure(const ICLTensor *input, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; +}; +} // arm_compute +#endif /*__ARM_COMPUTE_CLDEPTHWISEWEIGHTSRESHAPEKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h new file mode 100644 index 0000000000..580322fb51 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H__ +#define __ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the GEMM matrix vector multiply kernel. **/ +class CLGEMMMatrixVectorMultiplyKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLGEMMMatrixVectorMultiplyKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixVectorMultiplyKernel(const CLGEMMMatrixVectorMultiplyKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLGEMMMatrixVectorMultiplyKernel &operator=(const CLGEMMMatrixVectorMultiplyKernel &) = delete; + /** Allow instances of this class to be moved */ + CLGEMMMatrixVectorMultiplyKernel(CLGEMMMatrixVectorMultiplyKernel &&) = default; + /** Allow instances of this class to be moved */ + CLGEMMMatrixVectorMultiplyKernel &operator=(CLGEMMMatrixVectorMultiplyKernel &&) = default; + /** Set the input and output of the kernel. + * + * @param[in] input0 The reshaped input tensor. Data types supported: F16/F32 + * @param[in] input1 The 2D reshaped weights tensor. Data type supported: Same as @p input. + * @param[out] output The output 2D tensor. Data types supported: Same as @p input + */ + void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + const ICLTensor *_input0; + const ICLTensor *_input1; + ICLTensor *_output; + int _num_rows_read_per_iteration; + BorderSize _border_size; +}; +} // arm_compute +#endif /*__ARM_COMPUTE_CLGEMMMATRIXVECTORMULTIPLYKERNEL_H__ */ diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h index cc11f9cc5a..53bc079cb2 100644 --- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h +++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h @@ -21,26 +21,57 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef __ARM_COMPUTE_CL_DEPTHWISE_CONVOLUTION_H__ -#define __ARM_COMPUTE_CL_DEPTHWISE_CONVOLUTION_H__ +#ifndef __ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ +#define __ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionKernel.h" +#include "arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h" +#include "arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h" +#include "arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h" #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/ICLSimpleFunction.h" #include "arm_compute/runtime/IFunction.h" -#include - namespace arm_compute { class ICLTensor; -/** Basic function to execute depthwise convolution. This function calls the following OpenCL kernels: +/** Basic function to execute a depthwise convolution for kernel size 3x3xC. This function calls the following OpenCL kernels: * - * -# @ref CLFillBorderKernel (executed if border_mode == CONSTANT or border_mode == REPLICATE) - * -# @ref CLDepthwiseConvolutionKernel + * -# @ref CLDepthwiseConvolution3x3Kernel + * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0) + * + */ +class CLDepthwiseConvolution3x3 : public IFunction +{ +public: + /** Default constructor */ + CLDepthwiseConvolution3x3(); + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in, out] input Source tensor. Data type supported: F32. (Written to only for border filling). + * @param[out] output Destination tensor. Data type supported: same as @p input. + * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + */ + void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *weights, const PadStrideInfo &conv_info); + + // Inherited methods overriden: + void run() override; + +private: + CLDepthwiseConvolution3x3Kernel _kernel; + CLFillBorderKernel _border_handler; +}; + +/** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels: + * + * -# @ref CLDepthwiseIm2ColKernel + * -# @ref CLGEMMMatrixVectorMultiplyKernel + * -# @ref CLDepthwiseWeightsReshapeKernel + * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0) * */ class CLDepthwiseConvolution : public IFunction @@ -48,12 +79,12 @@ class CLDepthwiseConvolution : public IFunction public: /** Default constructor */ CLDepthwiseConvolution(); - /** Initialize the function's source, destination, conv and border_size. + /** Initialize the function's source, destination, weights and convolution information. * - * @param[in] input Source tensor. DataType supported: F32. (Written to only for border filling). - * @param[out] output Destination tensor. DataType supported: F32. - * @param[in] weights Weights tensor. These are 3D tensors with dimensions [3, 3, IFM]. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. DataType supported: F32. + * @param[in, out] input Source tensor. Data type supported: F32. (Written to only for border filling). + * @param[out] output Destination tensor. Data type supported: same as @p input. + * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. */ void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *weights, const PadStrideInfo &conv_info); @@ -61,8 +92,15 @@ public: void run() override; private: - CLDepthwiseConvolutionKernel _kernel; - CLFillBorderKernel _border_handler; + CLDepthwiseIm2ColKernel _im2col_kernel; + CLDepthwiseWeightsReshapeKernel _weights_reshape_kernel; + CLGEMMMatrixVectorMultiplyKernel _v2mm_kernel; + CLDepthwiseVectorToTensorKernel _vector_to_tensor_kernel; + CLFillBorderKernel _v2mm_input_fill_border; + CLFillBorderKernel _v2mm_weights_fill_border; + CLTensor _input_reshaped; + CLTensor _weights_reshaped; + CLTensor _v2mm_output; }; } -#endif /*__ARM_COMPUTE_CL_DEPTHWISE_CONVOLUTION_H__ */ +#endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ */ -- cgit v1.2.1