diff options
author | Giorgio Arena <giorgio.arena@arm.com> | 2018-04-04 17:44:26 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:50:48 +0000 |
commit | 7657224de2b697a8a92cccf26d98e53ccd7c1a03 (patch) | |
tree | 1dcfa4541dbaf753854a628c93991652158d373e /arm_compute | |
parent | e74b201ca1abca040ca9f30837fdf19aa610e7c4 (diff) | |
download | ComputeLibrary-7657224de2b697a8a92cccf26d98e53ccd7c1a03.tar.gz |
COMPMID-926 Add depth multiplier support to NEON/CL/GLES depthwise convolution
Change-Id: I03f32c62350e5ea43e77bb15fc5a832d83719e3b
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/126657
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'arm_compute')
11 files changed, 123 insertions, 104 deletions
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h index 0f3f4bfc76..f80985a936 100644 --- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h @@ -39,15 +39,16 @@ public: CLDepthwiseConvolutionLayer3x3NCHWKernel(); /** Initialize the function's source, destination, conv and border_size. * - * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported. + * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported. */ - void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info) override; void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h index 4ecc07af6a..62b9a8682e 100644 --- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h @@ -40,29 +40,31 @@ public: /** Default move assignment operator. */ /** Initialize the function's source, destination, conv and border_size. * - * @param[in] input Source tensor. DataType supported: QASYMM8. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. + * @param[in] input Source tensor. DataType supported: QASYMM8. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. */ - void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info) override; /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel * - * @param[in] input Source tensor. DataType supported: QASYMM8. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[in] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. + * @param[in] input Source tensor. DataType supported: QASYMM8. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[in] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info = ActivationLayerInfo()); void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h index 86b8b90e94..b8343da50a 100644 --- a/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h @@ -33,7 +33,7 @@ class ICLTensor; /** Interface for the depthwise im2col reshape kernel. * This kernel reshape the input low 3 dimensions to a new 3D shape where the output's first dimension is - * the linear patch size (FILTER_WIDTH * FILTER_HEIGHT) and second dimension is number of patches in per image and third dimension unchanged . + * the linear patch size (FILTER_WIDTH * FILTER_HEIGHT) and second dimension is number of patches per image and third dimension unchanged . **/ class CLDepthwiseIm2ColKernel : public ICLKernel { @@ -50,15 +50,16 @@ public: CLDepthwiseIm2ColKernel &operator=(CLDepthwiseIm2ColKernel &&) = default; /** Set the input and output of the kernel. * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F32 - * @param[out] output The output tensor. First 3 lower dimensions represent a transform of each 3D input, - * while every dimension above 3 represents a batch. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias Boolean that specifies if the depthwise convolution has bias. + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F32 + * @param[out] output The output tensor. First 3 lower dimensions represent a transform of each 3D input, + * while every dimension above 3 represents a batch. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias Boolean that specifies if the depthwise convolution has bias. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. */ - void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias = false); + void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias = false, unsigned int depth_multiplier = 1); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h index f02ba331b5..15233c5c32 100644 --- a/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h +++ b/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h @@ -50,15 +50,16 @@ public: ICLDepthwiseConvolutionLayer3x3Kernel &operator=(ICLDepthwiseConvolutionLayer3x3Kernel &&) = default; /** Initialize the function's source, destination, conv and border_size. * - * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported. + * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported. */ - virtual void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + virtual void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo()) = 0; protected: diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h index 46b9e897bb..7d0d80649e 100644 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h @@ -47,14 +47,15 @@ public: GCDepthwiseConvolutionLayer3x3Kernel &operator=(GCDepthwiseConvolutionLayer3x3Kernel &&) = default; /** Initialize the function's source, destination, conv and border_size. * - * @param[in] input Source tensor. DataType supported: F16. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] input Source tensor. DataType supported: F16. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. */ - void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info); + void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1); // Inherited methods overridden: void run(const Window &window) override; diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h index 0c2f30a98c..bd9e7eb781 100644 --- a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h +++ b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h @@ -53,23 +53,25 @@ public: NEDepthwiseConvolutionLayer3x3Kernel &operator=(NEDepthwiseConvolutionLayer3x3Kernel &&) = default; /** Initialize the function's source, destination, conv and border_size. * - * @param[in] input Source tensor. DataType supported: QASYMM8, F32. - * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] data_layout (Optional) Data layout of the input and weights tensor + * @param[in] input Source tensor. DataType supported: QASYMM8, F32. + * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] data_layout (Optional) Data layout of the input and weights tensor */ - void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info, DataLayout data_layout = DataLayout::NCHW); + void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, DataLayout data_layout = DataLayout::NCHW); /** Static method that checks if optimized execution is supported for the given parameters * - * @param[in] input_shape Input shape - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] dt Data type of the input and weights - * @param[in] data_layout (Optional) Data layout of the input and weights tensor + * @param[in] input_shape Input shape + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] dt Data type of the input and weights + * @param[in] data_layout (Optional) Data layout of the input and weights tensor + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. * * @return True if the optimized kernels can be executed else false */ - static bool is_optimized_execution_possible(TensorShape input_shape, PadStrideInfo conv_info, DataType dt, DataLayout data_layout = DataLayout::NCHW); + static bool is_optimized_execution_possible(TensorShape input_shape, PadStrideInfo conv_info, DataType dt, unsigned int depth_multiplier = 1, DataLayout data_layout = DataLayout::NCHW); /** Generates the convolver object */ void generate_convolver(); @@ -110,6 +112,7 @@ private: std::unique_ptr<depthwise::IDepthwiseConvolution> _convolver; unsigned int _num_elems_written_per_iteration; bool _run_optimized; + unsigned int _depth_multiplier; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTIONKERNEL3x3_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h index ca10bfaab2..9c11cfa425 100644 --- a/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h +++ b/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h @@ -54,15 +54,16 @@ public: NEDepthwiseIm2ColKernel &operator=(NEDepthwiseIm2ColKernel &&) = default; /** Set the input and output of the kernel. * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8, F32 - * @param[out] output The output tensor. First 3 lower dimensions represent a transform of each 3D input, - * while every dimension above 3 represents a batch. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias Boolean that specifies if the depthwise convolution has bias. + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8, F32 + * @param[out] output The output tensor. First 3 lower dimensions represent a transform of each 3D input, + * while every dimension above 3 represents a batch. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias Boolean that specifies if the depthwise convolution has bias. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. */ - void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias = false); + void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias = false, unsigned int depth_multiplier = 1); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; @@ -87,6 +88,7 @@ private: Size2D _kernel_dims; PadStrideInfo _conv_info; bool _has_bias; + unsigned int _depth_multiplier; }; } // arm_compute #endif /*__ARM_COMPUTE_NEDEPTHWISEIM2COLKERNEL_H__ */ diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index 7d07d4619b..a0bc4eab54 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -125,7 +125,7 @@ inline TensorShape compute_transposed_shape(const ITensorInfo &input) return shape_transposed; } -inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info) +inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info, unsigned int depth_multiplier) { const TensorShape input_shape{ input.tensor_shape() }; const TensorShape weights_shape{ weights.tensor_shape() }; @@ -133,6 +133,7 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const DataLayout data_layout = input.data_layout(); const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); unsigned int output_width = 0; unsigned int output_height = 0; @@ -143,6 +144,7 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, TensorShape output_shape{ input_shape }; output_shape.set(width_idx, output_width); output_shape.set(height_idx, output_height); + output_shape.set(channel_idx, input_shape[channel_idx] * depth_multiplier); return output_shape; } diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h index 82947bc7e6..6e5ce4cd48 100644 --- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h @@ -55,15 +55,17 @@ public: CLDepthwiseConvolutionLayer3x3(); /** Initialize the function's source, destination, conv and border_size. * - * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling). - * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported. + * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling). + * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported. */ - void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, ActivationLayerInfo act_info = ActivationLayerInfo()); + void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, + ActivationLayerInfo act_info = ActivationLayerInfo()); // Inherited methods overriden: void run() override; @@ -96,14 +98,15 @@ public: CLDepthwiseConvolutionLayer &operator=(CLDepthwiseConvolutionLayer &&) = default; /** Initialize the function's source, destination, weights and convolution information. * - * @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling). - * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8. - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling). + * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8. + * @param[out] output Destination tensor. Data type supported: same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. */ - void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); + void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1); // Inherited methods overriden: void run() override; diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.h index e523356fca..c99485634c 100644 --- a/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.h @@ -47,14 +47,15 @@ public: GCDepthwiseConvolutionLayer3x3(); /** Initialize the function's source, destination, conv and border_size. * - * @param[in, out] input Source tensor. Data type supported: F16. (Written to only for border filling). - * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in, out] input Source tensor. Data type supported: F16. (Written to only for border filling). + * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. */ - void configure(IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info); + void configure(IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1); // Inherited methods overridden: void run() override final; diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h index 84d3594426..b80fb7f2c8 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h @@ -55,14 +55,15 @@ public: NEDepthwiseConvolutionLayer3x3(); /** Initialize the function's source, destination, kernels and border_size. * - * @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling). - * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling). + * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. */ - void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info); + void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1); // Inherited methods overriden: void run() override; @@ -109,14 +110,15 @@ public: NEDepthwiseConvolutionLayer &operator=(NEDepthwiseConvolutionLayer &&) = default; /** Initialize the function's source, destination, weights and convolution information. * - * @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling). - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8. - * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling). + * @param[out] output Destination tensor. Data type supported: same as @p input. + * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. */ - void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info); + void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1); // Inherited methods overriden: void run() override; |