diff options
Diffstat (limited to 'arm_compute')
-rw-r--r-- | arm_compute/core/CL/CLKernels.h | 4 | ||||
-rw-r--r-- | arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h | 61 | ||||
-rw-r--r-- | arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h | 75 | ||||
-rw-r--r-- | arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h (renamed from arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h) | 32 | ||||
-rw-r--r-- | arm_compute/core/utils/misc/ShapeCalculator.h | 12 | ||||
-rw-r--r-- | arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h | 13 |
6 files changed, 170 insertions, 27 deletions
diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h index 9481e144df..67b7a8bb8f 100644 --- a/arm_compute/core/CL/CLKernels.h +++ b/arm_compute/core/CL/CLKernels.h @@ -46,7 +46,8 @@ #include "arm_compute/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.h" #include "arm_compute/core/CL/kernels/CLDepthConcatenateLayerKernel.h" #include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" +#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h" #include "arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h" #include "arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h" #include "arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h" @@ -113,5 +114,6 @@ #include "arm_compute/core/CL/kernels/CLWinogradFilterTransformKernel.h" #include "arm_compute/core/CL/kernels/CLWinogradInputTransformKernel.h" #include "arm_compute/core/CL/kernels/CLWinogradOutputTransformKernel.h" +#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h" #endif /* __ARM_COMPUTE_CLKERNELS_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h new file mode 100644 index 0000000000..0f3f4bfc76 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H__ +#define __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H__ + +#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor when the data layout is NCHW. + */ +class CLDepthwiseConvolutionLayer3x3NCHWKernel : public ICLDepthwiseConvolutionLayer3x3Kernel +{ +public: + /** Default constructor */ + CLDepthwiseConvolutionLayer3x3NCHWKernel(); + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported. + */ + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + ActivationLayerInfo act_info) override; + + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + unsigned int _conv_stride_x; + unsigned int _conv_pad_top; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNCHWKERNEL3x3_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h new file mode 100644 index 0000000000..4ecc07af6a --- /dev/null +++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H__ +#define __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H__ + +#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to run a 3x3 depthwise convolution on a tensor when the data layout is NHWC. + */ +class CLDepthwiseConvolutionLayer3x3NHWCKernel : public ICLDepthwiseConvolutionLayer3x3Kernel +{ +public: + /** Default constructor */ + CLDepthwiseConvolutionLayer3x3NHWCKernel(); + /** Default move assignment operator. */ + /** Initialize the function's source, destination, conv and border_size. + * + * @param[in] input Source tensor. DataType supported: QASYMM8. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. + */ + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + ActivationLayerInfo act_info) override; + /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel + * + * @param[in] input Source tensor. DataType supported: QASYMM8. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[in] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, + ActivationLayerInfo act_info = ActivationLayerInfo()); + + void run(const Window &window, cl::CommandQueue &queue) override; + BorderSize border_size() const override; + +private: + unsigned int _num_rows_processed_per_iteration; +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTIONNHWCKERNEL3x3_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h index 5f72cf70ed..f02ba331b5 100644 --- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h +++ b/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONKERNEL3x3_H__ -#define __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONKERNEL3x3_H__ +#ifndef __ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H__ +#define __ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H__ #include "arm_compute/core/CL/ICLKernel.h" @@ -32,19 +32,22 @@ class ICLTensor; /** Interface for the kernel to run a 3x3 depthwise convolution on a tensor. */ -class CLDepthwiseConvolutionLayer3x3Kernel : public ICLKernel +class ICLDepthwiseConvolutionLayer3x3Kernel : public ICLKernel { public: /** Default constructor */ - CLDepthwiseConvolutionLayer3x3Kernel(); + ICLDepthwiseConvolutionLayer3x3Kernel() + : _border_size(0), _input(), _output(), _weights(), _biases(), _conv_stride_y(1), _conv_pad_left(0) + { + } /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthwiseConvolutionLayer3x3Kernel(const CLDepthwiseConvolutionLayer3x3Kernel &) = delete; + ICLDepthwiseConvolutionLayer3x3Kernel(const ICLDepthwiseConvolutionLayer3x3Kernel &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthwiseConvolutionLayer3x3Kernel &operator=(const CLDepthwiseConvolutionLayer3x3Kernel &) = delete; + ICLDepthwiseConvolutionLayer3x3Kernel &operator=(const ICLDepthwiseConvolutionLayer3x3Kernel &) = delete; /** Default Move Constructor. */ - CLDepthwiseConvolutionLayer3x3Kernel(CLDepthwiseConvolutionLayer3x3Kernel &&) = default; + ICLDepthwiseConvolutionLayer3x3Kernel(ICLDepthwiseConvolutionLayer3x3Kernel &&) = default; /** Default move assignment operator */ - CLDepthwiseConvolutionLayer3x3Kernel &operator=(CLDepthwiseConvolutionLayer3x3Kernel &&) = default; + ICLDepthwiseConvolutionLayer3x3Kernel &operator=(ICLDepthwiseConvolutionLayer3x3Kernel &&) = default; /** Initialize the function's source, destination, conv and border_size. * * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32. @@ -55,22 +58,17 @@ public: * @param[in] conv_info Padding and stride information to use for the convolution. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported. */ - void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, ActivationLayerInfo act_info = ActivationLayerInfo()); + virtual void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + ActivationLayerInfo act_info = ActivationLayerInfo()) = 0; - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - BorderSize border_size() const override; - -private: +protected: BorderSize _border_size; const ICLTensor *_input; ICLTensor *_output; const ICLTensor *_weights; const ICLTensor *_biases; - unsigned int _conv_stride_x; unsigned int _conv_stride_y; unsigned int _conv_pad_left; - unsigned int _conv_pad_top; }; } // namespace arm_compute -#endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTIONKERNEL3x3_H__ */ +#endif /*__ARM_COMPUTE_ICLDEPTHWISECONVOLUTIONKERNEL3x3_H__ */ diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index e174227302..b91e52a657 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -130,15 +130,19 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const TensorShape input_shape{ input.tensor_shape() }; const TensorShape weights_shape{ weights.tensor_shape() }; + const DataLayout data_layout = input.data_layout(); + const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + unsigned int output_width = 0; unsigned int output_height = 0; - std::tie(output_width, output_height) = scaled_dimensions(input_shape.x(), input_shape.y(), - weights_shape.x(), weights_shape.y(), + std::tie(output_width, output_height) = scaled_dimensions(input_shape[width_idx], input_shape[height_idx], + weights_shape[width_idx], weights_shape[height_idx], conv_info); TensorShape output_shape{ input_shape }; - output_shape.set(0, output_width); - output_shape.set(1, output_height); + output_shape.set(width_idx, output_width); + output_shape.set(height_idx, output_height); return output_shape; } diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h index d6fc8f0fcc..82947bc7e6 100644 --- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h @@ -24,13 +24,15 @@ #ifndef __ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ #define __ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ -#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3Kernel.h" +#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" +#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h" #include "arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h" #include "arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h" #include "arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h" #include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerOutputStageKernel.h" #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h" +#include "arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/IFunction.h" @@ -39,9 +41,10 @@ namespace arm_compute { class ICLTensor; -/** Basic function to execute a depthwise convolution for kernel size 3x3xC. This function calls the following OpenCL kernels: +/** Basic function to execute a depthwise convolution for kernel size 3x3xC (when data layout NCHW) or Cx3x3 (when data layout NHWC). This function calls the following OpenCL kernels: * - * -# @ref CLDepthwiseConvolutionLayer3x3Kernel + * -# @ref CLDepthwiseConvolutionLayer3x3NCHWKernel (if data_layout == NCHW) + * -# @ref CLDepthwiseConvolutionLayer3x3NHWCKernel (if data_layout == NHWC) * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0) * */ @@ -66,8 +69,8 @@ public: void run() override; private: - CLDepthwiseConvolutionLayer3x3Kernel _kernel; - CLFillBorderKernel _border_handler; + std::unique_ptr<ICLDepthwiseConvolutionLayer3x3Kernel> _kernel; + CLFillBorderKernel _border_handler; }; /** Basic function to execute a generic depthwise convolution. This function calls the following OpenCL kernels: |