From 916d1bcee42051721a82cfb46b52855c2fe56646 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Mon, 13 Aug 2018 11:20:41 +0100 Subject: COMPMID-1498 - Enable grouping in CLGEMMConvolutionLayer Change-Id: I15c7df21773145b03f42b6f78bd7ad2e5b8a5219 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/144126 Tested-by: Jenkins Reviewed-by: Giorgio Arena Reviewed-by: Georgios Pinitas --- arm_compute/core/CL/kernels/CLCol2ImKernel.h | 5 +- arm_compute/core/CL/kernels/CLIm2ColKernel.h | 4 +- .../core/CL/kernels/CLWeightsReshapeKernel.h | 8 +- arm_compute/core/NEON/kernels/NEIm2ColKernel.h | 4 +- arm_compute/core/utils/misc/ShapeCalculator.h | 9 +- .../runtime/CL/functions/CLConvolutionLayer.h | 9 +- .../runtime/CL/functions/CLGEMMConvolutionLayer.h | 28 ++--- .../GLES_COMPUTE/functions/GCConvolutionLayer.h | 23 +--- .../runtime/NEON/functions/NEConvolutionLayer.h | 9 +- .../NEON/functions/NEGEMMConvolutionLayer.h | 6 +- src/core/CL/kernels/CLCol2ImKernel.cpp | 3 +- src/core/CL/kernels/CLIm2ColKernel.cpp | 2 + src/core/CL/kernels/CLWeightsReshapeKernel.cpp | 25 +++-- src/runtime/CL/functions/CLConvolutionLayer.cpp | 15 ++- .../CL/functions/CLGEMMConvolutionLayer.cpp | 43 ++++---- .../GLES_COMPUTE/functions/GCConvolutionLayer.cpp | 4 +- src/runtime/NEON/functions/NEConvolutionLayer.cpp | 7 +- .../NEON/functions/NEGEMMConvolutionLayer.cpp | 10 +- tests/datasets/LargeConvolutionLayerDataset.h | 38 +++++-- tests/datasets/SmallConvolutionLayerDataset.h | 35 ++++++ tests/validation/CL/ConvolutionLayer.cpp | 117 +++++++++++++++++++-- tests/validation/CL/WeightsReshape.cpp | 4 +- .../validation/fixtures/ConvolutionLayerFixture.h | 17 ++- tests/validation/reference/ConvolutionLayer.cpp | 49 +++++---- tests/validation/reference/ConvolutionLayer.h | 2 +- 25 files changed, 331 insertions(+), 145 deletions(-) diff --git a/arm_compute/core/CL/kernels/CLCol2ImKernel.h b/arm_compute/core/CL/kernels/CLCol2ImKernel.h index 5c047ca091..2a18ae08c8 100644 --- a/arm_compute/core/CL/kernels/CLCol2ImKernel.h +++ b/arm_compute/core/CL/kernels/CLCol2ImKernel.h @@ -63,12 +63,11 @@ public: CLCol2ImKernel &operator=(CLCol2ImKernel &&) = default; /** Default destructor */ ~CLCol2ImKernel() = default; - /** Set the input and output of the kernel. * * @param[in] input The input tensor to convert. Data types supported: QASYMM8/F16/F32 * @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], - * while the rest represent batch of outputs. Data types supported: Same as @p input + * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW * @param[in] convolved_dims Output convolved dimensions. * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution */ @@ -77,7 +76,7 @@ public: * * @param[in] input The input tensor to convert. Data types supported: QASYMM8/F16/F32 * @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], - * while the rest represent batch of outputs. Data types supported: Same as @p input + * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW * @param[in] convolved_dims Output convolved dimensions. * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution * diff --git a/arm_compute/core/CL/kernels/CLIm2ColKernel.h b/arm_compute/core/CL/kernels/CLIm2ColKernel.h index c678f277cb..0647f5dcec 100644 --- a/arm_compute/core/CL/kernels/CLIm2ColKernel.h +++ b/arm_compute/core/CL/kernels/CLIm2ColKernel.h @@ -76,7 +76,7 @@ public: * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] has_bias In case biases are provided expands the matrix with 1. * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout */ void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1); @@ -90,7 +90,7 @@ public: * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] has_bias In case biases are provided expands the matrix with 1. * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout * * @return a status */ diff --git a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h index d3bbbaf470..bdc5792641 100644 --- a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h +++ b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h @@ -75,10 +75,10 @@ public: * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. * @param[out] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise. * Data types supported: Same as @p input - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it. */ - void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, const unsigned int num_groups = 1); + void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1); /** Static function to check if given info will lead to a valid configuration of @ref CLWeightsReshapeKernel * * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, @@ -88,12 +88,12 @@ public: * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. * @param[in] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise. * Data types supported: Same as @p input - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout * Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, const unsigned int num_groups = 1); + static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups = 1); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h index 37145a38c1..38bdff0e98 100644 --- a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h +++ b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h @@ -84,7 +84,7 @@ public: * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] has_bias In case biases are provided expands the matrix with 1. * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported * @param[in] is_fully_connected (Optional) Determines whether this kernel will be called by @ref NEFullyConnectedLayer in order to validate the arguments * @param[in] is_flatten (Optional) Determines whether this kernel will be called by @ref NEFlattenLayer in order to validate the arguments */ @@ -100,7 +100,7 @@ public: * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] has_bias In case biases are provided expands the matrix with 1. * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported * @param[in] is_fully_connected (Optional)Determines whether this kernel will be called by @ref NEFullyConnectedLayer in order to validate the arguments * @param[in] is_flatten (Optional) Determines whether this kernel will be called by @ref NEFlattenLayer in order to validate the arguments * diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index 8a00c22306..c40e7119b2 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -55,19 +55,20 @@ inline TensorShape compute_permutation_output_shape(const ITensorInfo &input, co permute(output_shape, perm); return output_shape; } -inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias = false, const unsigned int num_groups = 1) +inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias = false, unsigned int num_groups = 1) { // Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it. - ARM_COMPUTE_ERROR_ON(num_groups == 0); - ARM_COMPUTE_ERROR_ON((weights.dimension(3) % num_groups) != 0); ARM_COMPUTE_ERROR_ON(weights.data_layout() == DataLayout::NHWC && num_groups > 1); + ARM_COMPUTE_ERROR_ON((weights.dimension(3) % num_groups) != 0); // Calculate output shape TensorShape weights_reshaped{ weights.tensor_shape() }; + weights_reshaped.set(3, weights_reshaped[3] / num_groups); + weights_reshaped.collapse(3); const size_t tmp_dim = weights_reshaped[0]; - weights_reshaped.set(0, weights_reshaped[1] / num_groups); + weights_reshaped.set(0, weights_reshaped[1]); weights_reshaped.set(1, tmp_dim + (has_bias ? 1 : 0)); if(weights.num_dimensions() < 5) { diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h index c9a74f2a4c..8270e9723e 100644 --- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h @@ -60,10 +60,11 @@ public: * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation - * available which may introduce a drop of accuracy as well. Default is false + * available which may introduce a drop of accuracy as well. Default is false + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout */ void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(), - const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false); + const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, unsigned int num_groups = 1); /** Static function to check if given info will lead to a valid configuration of @ref CLConvolutionLayer * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -79,11 +80,13 @@ public: * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation * available which may introduce a drop of accuracy as well. Default is false + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout * * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false); + const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, + unsigned int num_groups = 1); /** Static function to check if given info will return the convolution called by @ref CLConvolutionLayer * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h index 8538d83c2b..c6ae535713 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h @@ -59,22 +59,24 @@ public: CLConvolutionLayerReshapeWeights(); /** Set the input and output tensors. * - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: QASYMM8/F16/F32. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. - * @param[out] output Destination tensor. Data types supported: Same as @p weights. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported: QASYMM8/F16/F32. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. + * @param[out] output Destination tensor. Data types supported: Same as @p weights. + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout */ - void configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output); + void configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1); /** Static function to check if given info will lead to a valid configuration of @ref CLConvolutionLayerReshapeWeights * - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: QASYMM8/F16/F32. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. - * @param[in] output Destination tensor. Data types supported: Same as @p weights. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported: QASYMM8/F16/F32. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. + * @param[in] output Destination tensor. Data types supported: Same as @p weights. + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout * * @return a status */ - static Status validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output); + static Status validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups = 1); // Inherited methods overridden: void run() override; @@ -122,9 +124,10 @@ public: * tensor has also been transposed with CLGEMMTranspose1xWKernel. Data type supported: Same as @p input. * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout */ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(), - const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo()); + const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer. * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -140,11 +143,12 @@ public: * tensor has also been transposed with CLGEMMTranspose1xWKernel. Data type supported: Same as @p input. * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout * * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo()); + const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h index 421150e18e..4fac95e72f 100644 --- a/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h @@ -101,29 +101,10 @@ public: * tensor has also been transposed with GCGEMMTranspose1xWKernel. Data type supported: Same as @p input. * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * @param[in] act_info (Optional) Activation layer information in case of a fused activation. + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported */ void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref GCConvolutionLayer. - * - * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. - * Data types supported: QASYMM8/F16/F32. - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. - * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type. - * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. - * Data types supported: Same as @p input. - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] weights_info Specifies if the weights tensor has been reshaped with GCWeightsReshapeKernel. If this is not part of the fully connected layer the weights - * tensor has also been transposed with GCGEMMTranspose1xWKernel. Data type supported: Same as @p input. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo()); + const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h index c4226cbc5d..5b53aec9e6 100644 --- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h @@ -64,10 +64,11 @@ public: * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation - * available which may introduce a drop of accuracy as well. Default is false + * available which may introduce a drop of accuracy as well. Default is false + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported */ void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(), - const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false); + const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, unsigned int num_groups = 1); /** Static function to check if given info will lead to a valid configuration of @ref NEConvolutionLayer * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -85,11 +86,13 @@ public: * @param[in] act_info (Optional) Activation layer information in case of a fused activation. * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation * available which may introduce a drop of accuracy as well. Default is false + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported * * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false); + const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, + unsigned int num_groups = 1); /** Static function to check if given info will return the convolution called by @ref NEConvolutionLayer * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h index e587cb4e6f..dc00b17efc 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h @@ -116,9 +116,10 @@ public: * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported */ void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(), - const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo()); + const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1); /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -134,11 +135,12 @@ public: * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported. + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported * * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo()); + const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1); // Inherited methods overridden: void run() override; diff --git a/src/core/CL/kernels/CLCol2ImKernel.cpp b/src/core/CL/kernels/CLCol2ImKernel.cpp index d7582dc943..40032f97c4 100644 --- a/src/core/CL/kernels/CLCol2ImKernel.cpp +++ b/src/core/CL/kernels/CLCol2ImKernel.cpp @@ -119,7 +119,8 @@ void CLCol2ImKernel::configure(const ICLTensor *input, ICLTensor *output, std::p _config_id = "col2im_"; _config_id += lower_string(string_from_data_type(input->info()->data_type())); _config_id += "_"; - _config_id += (num_groups > 1) ? "grouping_" : ""; + _config_id += support::cpp11::to_string(num_groups); + _config_id += "_"; _config_id += support::cpp11::to_string(input->info()->dimension(0)); _config_id += "_"; _config_id += support::cpp11::to_string(input->info()->dimension(1)); diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index 3d71567eee..0ba0d0e2f9 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -324,6 +324,8 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const _config_id += "_"; _config_id += lower_string(string_from_data_type(input->info()->data_type())); _config_id += "_"; + _config_id += support::cpp11::to_string(num_groups); + _config_id += "_"; _config_id += support::cpp11::to_string(output->info()->dimension(0)); _config_id += "_"; _config_id += support::cpp11::to_string(output->info()->dimension(1)); diff --git a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp index 5ef0f5b152..7639a48a5f 100644 --- a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp +++ b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp @@ -38,7 +38,7 @@ using namespace arm_compute::misc::shape_calculator; namespace { -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, const unsigned int num_groups) +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); @@ -75,12 +75,12 @@ CLWeightsReshapeKernel::CLWeightsReshapeKernel() { } -void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, const unsigned int num_groups) +void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); // Output tensor auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(compute_weights_reshaped_shape(*input->info(), (biases != nullptr)))); + auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(compute_weights_reshaped_shape(*input->info(), (biases != nullptr), num_groups))); // Perform validation step ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), @@ -102,15 +102,6 @@ void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor * // Create kernel _kernel = static_cast(CLKernelLibrary::get().create_kernel("reshape_to_columns", build_opts.options())); - // Set static arguments - unsigned int idx = num_arguments_per_3D_tensor() + num_arguments_per_2D_tensor(); - idx += (biases != nullptr) ? num_arguments_per_1D_tensor() : 0; - _kernel.setArg(idx++, _input->info()->dimension(0)); - _kernel.setArg(idx++, _input->info()->dimension(1)); - _kernel.setArg(idx++, _input->info()->dimension(2)); - _kernel.setArg(idx++, _input->info()->dimension(3)); - _kernel.setArg(idx++, _output->info()->strides_in_bytes().z()); - // Configure window Window win = calculate_max_window(*input->info(), Steps()); // The CLWeightsReshapeKernel doesn't need padding so update_window_and_padding() can be skipped @@ -118,7 +109,7 @@ void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor * ICLKernel::configure_internal(win); } -Status CLWeightsReshapeKernel::validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, const unsigned int num_groups) +Status CLWeightsReshapeKernel::validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, biases, output, num_groups)); return Status{}; @@ -138,6 +129,14 @@ void CLWeightsReshapeKernel::run(const Window &window, cl::CommandQueue &queue) Window biases_window; Window biases_slice; + unsigned int idx = num_arguments_per_3D_tensor() + num_arguments_per_2D_tensor(); + idx += (_biases != nullptr) ? num_arguments_per_1D_tensor() : 0; + _kernel.setArg(idx++, _input->info()->dimension(0)); + _kernel.setArg(idx++, _input->info()->dimension(1)); + _kernel.setArg(idx++, _input->info()->dimension(2)); + _kernel.setArg(idx++, _input->info()->dimension(3)); + _kernel.setArg(idx++, _output->info()->strides_in_bytes().z()); + if(_biases != nullptr) { biases_window.use_tensor_dimensions(_biases->info()->tensor_shape()); diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp index d36cce6505..0014e71734 100644 --- a/src/runtime/CL/functions/CLConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp @@ -43,17 +43,18 @@ CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr memory_ma } void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, - const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math) + const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info, - enable_fast_math)); + enable_fast_math, num_groups)); switch(CLConvolutionLayer::get_convolution_method(input->info(), weights->info(), output->info(), conv_info, weights_info, act_info, CLScheduler::get().target(), dilation, enable_fast_math)) { case ConvolutionMethod::WINOGRAD: { + ARM_COMPUTE_ERROR_ON(num_groups != 1); auto f = arm_compute::support::cpp14::make_unique(_memory_manager); f->configure(input, weights, biases, output, conv_info, act_info, enable_fast_math); _function = std::move(f); @@ -61,6 +62,7 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c } case ConvolutionMethod::DIRECT: { + ARM_COMPUTE_ERROR_ON(num_groups != 1); auto f = arm_compute::support::cpp14::make_unique(); f->configure(input, weights, biases, output, conv_info, act_info); _function = std::move(f); @@ -69,7 +71,7 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c case ConvolutionMethod::GEMM: { auto f = arm_compute::support::cpp14::make_unique(_memory_manager); - f->configure(input, weights, biases, output, conv_info, weights_info, dilation, act_info); + f->configure(input, weights, biases, output, conv_info, weights_info, dilation, act_info, num_groups); _function = std::move(f); break; } @@ -80,9 +82,10 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c } Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math) + const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); + ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1) && (input->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported"); const GPUTarget gpu_target = CLScheduler::get().target(); @@ -91,19 +94,21 @@ Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo case ConvolutionMethod::WINOGRAD: { //Validate Winograd + ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups != 1, "Grouping (num_groups != 1) with CLWinogradConvolutionLayer is not supported"); ARM_COMPUTE_RETURN_ON_ERROR(CLWinogradConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info, enable_fast_math)); break; } case ConvolutionMethod::DIRECT: { // Validate direct convolution layer + ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups != 1, "Grouping (num_groups != 1) with CLDirectConvolutionLayer is not supported"); ARM_COMPUTE_RETURN_ON_ERROR(CLDirectConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info)); break; } case ConvolutionMethod::GEMM: { // Validate gemm-based convolution layer - ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info)); + ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info, num_groups)); break; } default: diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp index 1e639d9dff..782fe710e7 100644 --- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp @@ -43,23 +43,24 @@ CLConvolutionLayerReshapeWeights::CLConvolutionLayerReshapeWeights() { } -void CLConvolutionLayerReshapeWeights::configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output) +void CLConvolutionLayerReshapeWeights::configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups) { // Perform validation step ARM_COMPUTE_ERROR_ON_NULLPTR(weights, output); ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayerReshapeWeights::validate(weights->info(), (biases != nullptr) ? biases->info() : nullptr, - output->info())); + output->info(), + num_groups)); const bool append_biases = (biases != nullptr) && !is_data_type_quantized_asymmetric(weights->info()->data_type()); const ICLTensor *biases_to_use = (append_biases) ? biases : nullptr; - _weights_reshape_kernel.configure(weights, biases_to_use, output); + _weights_reshape_kernel.configure(weights, biases_to_use, output, num_groups); output->info()->set_quantization_info(weights->info()->quantization_info()); } -Status CLConvolutionLayerReshapeWeights::validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output) +Status CLConvolutionLayerReshapeWeights::validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(weights); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8, DataType::F16, DataType::F32); @@ -78,7 +79,7 @@ Status CLConvolutionLayerReshapeWeights::validate(const ITensorInfo *weights, co { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, output); - CLWeightsReshapeKernel::validate(weights, biases, output); + CLWeightsReshapeKernel::validate(weights, biases, output, num_groups); } return Status{}; @@ -153,7 +154,7 @@ Status CLGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens } void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, - const Size2D &dilation, const ActivationLayerInfo &act_info) + const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); @@ -164,7 +165,8 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * conv_info, weights_info, dilation, - act_info)); + act_info, + num_groups)); const DataType data_type = input->info()->data_type(); const DataLayout data_layout = input->info()->data_layout(); @@ -208,11 +210,11 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * conv_info, dilation); - unsigned int mat_weights_cols = weights->info()->dimension(idx_kernels); + unsigned int mat_weights_cols = weights->info()->dimension(idx_kernels) / num_groups; // _weights_reshaped will be auto configured in the kernel. // Just append biases and do not transpose 1xW as it will be reshaped in CLGEMM - _reshape_weights.configure(weights, biases_to_use, &_weights_reshaped); + _reshape_weights.configure(weights, biases_to_use, &_weights_reshaped, num_groups); // Create tensor to store im2col reshaped inputs if(!_skip_im2col) @@ -220,7 +222,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * _memory_group.manage(&_im2col_output); // Configure and tune im2col. im2col output shape is auto-initialized - _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, _append_bias, dilation); + _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, _append_bias, dilation, num_groups); // Set quantization info _im2col_output.info()->set_quantization_info(input->info()->quantization_info()); @@ -283,7 +285,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * if(input->info()->data_layout() == DataLayout::NCHW) { // Configure and tune Col2Im - _col2im_kernel.configure(_is_quantized ? gemm_output_staged_to_use : gemm_output_to_use, output, std::make_pair(conv_w, conv_h)); + _col2im_kernel.configure(_is_quantized ? gemm_output_staged_to_use : gemm_output_to_use, output, std::make_pair(conv_w, conv_h), num_groups); CLScheduler::get().tune_kernel_static(_col2im_kernel); } else @@ -314,13 +316,16 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * } Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info) + const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!"); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights); + ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1) && (input->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1) && (input->data_type() == DataType::QASYMM8), "Grouping (num_groups != 1) is not supported with QASYMM8"); + ARM_COMPUTE_RETURN_ERROR_ON(((input->dimension(2) / weights->dimension(2)) != num_groups) && (input->data_layout() == DataLayout::NCHW)); const DataLayout data_layout = input->data_layout(); const DataType data_type = input->data_type(); @@ -343,7 +348,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI const bool skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1) && !is_quantized; const bool append_bias = (biases != nullptr) && (!is_quantized); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_channel) != input->dimension(idx_channel)); + ARM_COMPUTE_RETURN_ERROR_ON((weights->dimension(idx_channel) * num_groups) != input->dimension(idx_channel)); ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4); // Validate biases @@ -377,11 +382,11 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI conv_info, dilation); - unsigned int mat_weights_cols = weights->dimension(idx_kernels); + unsigned int mat_weights_cols = weights->dimension(idx_kernels) / num_groups; // Output tensor auto inizialitation if not yet initialized - ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayerReshapeWeights::validate(weights, is_quantized ? nullptr : biases, nullptr)); - weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, (append_bias && !skip_im2col)), 1, data_type); + ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayerReshapeWeights::validate(weights, is_quantized ? nullptr : biases, nullptr, num_groups)); + weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, (append_bias && !skip_im2col), num_groups), 1, data_type); weights_to_use = &weights_reshaped_info; if(!skip_im2col) @@ -389,11 +394,11 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI const Size2D kernel_dims(kernel_width, kernel_height); // Output tensor auto initialization if not yet initialized - TensorShape expected_output_shape = compute_im2col_conv_shape(input, kernel_dims, conv_info, append_bias, dilation, true /* num_groups == 1, num_groups */); + TensorShape expected_output_shape = compute_im2col_conv_shape(input, kernel_dims, conv_info, append_bias, dilation, num_groups == 1, num_groups); auto_init_if_empty(im2col_reshaped_info, input->clone()->set_tensor_shape(expected_output_shape)); - ARM_COMPUTE_RETURN_ON_ERROR(CLIm2ColKernel::validate(input, &im2col_reshaped_info, kernel_dims, conv_info, append_bias, dilation)); + ARM_COMPUTE_RETURN_ON_ERROR(CLIm2ColKernel::validate(input, &im2col_reshaped_info, kernel_dims, conv_info, append_bias, dilation, num_groups)); gemm_input_to_use = &im2col_reshaped_info; } else if(append_bias) @@ -438,7 +443,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI { ARM_COMPUTE_RETURN_ON_ERROR(CLCol2ImKernel::validate(is_quantized ? gemm_output_staged_to_use : gemm_output_to_use, output, - std::make_pair(conv_w, conv_h))); + std::make_pair(conv_w, conv_h), num_groups)); } } diff --git a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp index 5cfd72f724..c58d184afa 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp @@ -88,7 +88,7 @@ Status GCConvolutionLayer::validate_mm(const ITensorInfo *input, const ITensorIn } void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, - const Size2D &dilation, const ActivationLayerInfo &act_info) + const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); @@ -96,6 +96,8 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig ARM_COMPUTE_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!"); ARM_COMPUTE_ERROR_ON(weights->info()->dimension(2) != input->info()->dimension(2)); ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4); + ARM_COMPUTE_ERROR_ON(num_groups > 1); + ARM_COMPUTE_UNUSED(num_groups); _is_prepared = false; _original_weights = weights; diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp index d71fd5a715..931e5db65b 100644 --- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp @@ -42,10 +42,11 @@ NEConvolutionLayer::NEConvolutionLayer(std::shared_ptr memory_ma } void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, - const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math) + const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups) { // Perform validate step ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); + ARM_COMPUTE_UNUSED(num_groups); ARM_COMPUTE_ERROR_THROW_ON(NEConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info, enable_fast_math)); @@ -79,8 +80,10 @@ void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const } Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math) + const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups) { + ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1), "Grouping (num_groups != 1) is not supported on NEON"); + switch(NEConvolutionLayer::get_convolution_method(input, weights, output, conv_info, weights_info, dilation, act_info)) { case ConvolutionMethod::WINOGRAD: diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp index 52b461e255..b76cf6aa10 100644 --- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp @@ -167,10 +167,10 @@ Status NEGEMMConvolutionLayer::validate_gemm3d(DataType data_type, int gemm_3d_d } void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, - const Size2D &dilation, const ActivationLayerInfo &act_info) + const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - + ARM_COMPUTE_UNUSED(num_groups); ARM_COMPUTE_ERROR_THROW_ON(NEGEMMConvolutionLayer::validate(input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, @@ -178,7 +178,8 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig conv_info, weights_info, dilation, - act_info)); + act_info, + num_groups)); const DataType data_type = input->info()->data_type(); const DataLayout data_layout = input->info()->data_layout(); @@ -346,13 +347,14 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig } Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info) + const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!"); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups > 1, "Grouping (num_groups != 1) is not supported on NEON"); const DataLayout data_layout = input->data_layout(); const DataType data_type = input->data_type(); diff --git a/tests/datasets/LargeConvolutionLayerDataset.h b/tests/datasets/LargeConvolutionLayerDataset.h index 3eb98dbeea..170d562f6c 100644 --- a/tests/datasets/LargeConvolutionLayerDataset.h +++ b/tests/datasets/LargeConvolutionLayerDataset.h @@ -166,31 +166,51 @@ public: // Batch size 1 add_config(TensorShape(227U, 227U, 3U), TensorShape(11U, 11U, 3U, 96U), TensorShape(96U), TensorShape(55U, 55U, 96U), PadStrideInfo(4, 4, 0, 0)); add_config(TensorShape(27U, 27U, 96U), TensorShape(5U, 5U, 96U, 256U), TensorShape(256U), TensorShape(27U, 27U, 256U), PadStrideInfo(1, 1, 2, 2)); - add_config(TensorShape(13U, 13U, 256U), TensorShape(3U, 3U, 256U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 384U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 384U, 256U), TensorShape(256U), TensorShape(13U, 13U, 256U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(13U, 13U, 256U), TensorShape(1U, 1U, 256U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(13U, 13U, 384U), TensorShape(1U, 1U, 384U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(224U, 224U, 3U), TensorShape(7U, 7U, 3U, 64U), TensorShape(64U), TensorShape(112U, 112U, 64U), PadStrideInfo(2, 2, 3, 3)); add_config(TensorShape(28U, 28U, 256U), TensorShape(1U, 1U, 256U, 64U), TensorShape(64U), TensorShape(28U, 28U, 64U), PadStrideInfo(1, 1, 0, 0)); // Batch size 4 add_config(TensorShape(227U, 227U, 3U, 4U), TensorShape(11U, 11U, 3U, 96U), TensorShape(96U), TensorShape(55U, 55U, 96U, 4U), PadStrideInfo(4, 4, 0, 0)); add_config(TensorShape(27U, 27U, 96U, 4U), TensorShape(5U, 5U, 96U, 256U), TensorShape(256U), TensorShape(27U, 27U, 256U, 4U), PadStrideInfo(1, 1, 2, 2)); - add_config(TensorShape(13U, 13U, 256U, 4U), TensorShape(3U, 3U, 256U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 4U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(13U, 13U, 384U, 4U), TensorShape(3U, 3U, 384U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 4U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(13U, 13U, 384U, 4U), TensorShape(3U, 3U, 384U, 256U), TensorShape(256U), TensorShape(13U, 13U, 256U, 4U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(13U, 13U, 256U, 4U), TensorShape(1U, 1U, 256U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 4U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(13U, 13U, 384U, 4U), TensorShape(1U, 1U, 384U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 4U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(224U, 224U, 3U, 4U), TensorShape(7U, 7U, 3U, 64U), TensorShape(64U), TensorShape(112U, 112U, 64U, 4U), PadStrideInfo(2, 2, 3, 3)); add_config(TensorShape(28U, 28U, 256U, 4U), TensorShape(1U, 1U, 256U, 64U), TensorShape(64U), TensorShape(28U, 28U, 64U, 4U), PadStrideInfo(1, 1, 0, 0)); // Batch size 8 add_config(TensorShape(227U, 227U, 3U, 8U), TensorShape(11U, 11U, 3U, 96U), TensorShape(96U), TensorShape(55U, 55U, 96U, 8U), PadStrideInfo(4, 4, 0, 0)); add_config(TensorShape(27U, 27U, 96U, 8U), TensorShape(5U, 5U, 96U, 256U), TensorShape(256U), TensorShape(27U, 27U, 256U, 8U), PadStrideInfo(1, 1, 2, 2)); - add_config(TensorShape(13U, 13U, 256U, 8U), TensorShape(3U, 3U, 256U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 8U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(13U, 13U, 384U, 8U), TensorShape(3U, 3U, 384U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 8U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(13U, 13U, 384U, 8U), TensorShape(3U, 3U, 384U, 256U), TensorShape(256U), TensorShape(13U, 13U, 256U, 8U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(13U, 13U, 256U, 8U), TensorShape(1U, 1U, 256U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 8U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(13U, 13U, 384U, 8U), TensorShape(1U, 1U, 384U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 8U), PadStrideInfo(1, 1, 0, 0)); add_config(TensorShape(224U, 224U, 3U, 8U), TensorShape(7U, 7U, 3U, 64U), TensorShape(64U), TensorShape(112U, 112U, 64U, 8U), PadStrideInfo(2, 2, 3, 3)); add_config(TensorShape(28U, 28U, 256U, 8U), TensorShape(1U, 1U, 256U, 64U), TensorShape(64U), TensorShape(28U, 28U, 64U, 8U), PadStrideInfo(1, 1, 0, 0)); // Arbitrary batch size add_config(TensorShape(227U, 227U, 3U, 5U), TensorShape(11U, 11U, 3U, 96U), TensorShape(96U), TensorShape(55U, 55U, 96U, 5U), PadStrideInfo(4, 4, 0, 0)); } }; + +class LargeGroupedConvolutionLayerDataset final : public ConvolutionLayerDataset +{ +public: + LargeGroupedConvolutionLayerDataset() + { + // Batch size 1 + add_config(TensorShape(227U, 227U, 4U), TensorShape(11U, 11U, 2U, 96U), TensorShape(96U), TensorShape(55U, 55U, 96U), PadStrideInfo(4, 4, 0, 0)); + add_config(TensorShape(27U, 27U, 96U), TensorShape(5U, 5U, 24U, 256U), TensorShape(256U), TensorShape(27U, 27U, 256U), PadStrideInfo(1, 1, 2, 2)); + add_config(TensorShape(13U, 13U, 256U), TensorShape(1U, 1U, 128U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 128U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U), PadStrideInfo(1, 1, 1, 1)); + // Batch size 4 + add_config(TensorShape(227U, 227U, 4U, 4U), TensorShape(11U, 11U, 2U, 96U), TensorShape(96U), TensorShape(55U, 55U, 96U, 4U), PadStrideInfo(4, 4, 0, 0)); + add_config(TensorShape(27U, 27U, 96U, 4U), TensorShape(5U, 5U, 24U, 256U), TensorShape(256U), TensorShape(27U, 27U, 256U, 4U), PadStrideInfo(1, 1, 2, 2)); + add_config(TensorShape(13U, 13U, 256U, 4U), TensorShape(3U, 3U, 128U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 4U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(13U, 13U, 384U, 4U), TensorShape(3U, 3U, 128U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 4U), PadStrideInfo(1, 1, 1, 1)); + // Batch size 8 + add_config(TensorShape(227U, 227U, 4U, 8U), TensorShape(11U, 11U, 2U, 96U), TensorShape(96U), TensorShape(55U, 55U, 96U, 8U), PadStrideInfo(4, 4, 0, 0)); + add_config(TensorShape(27U, 27U, 96U, 8U), TensorShape(5U, 5U, 24U, 256U), TensorShape(256U), TensorShape(27U, 27U, 256U, 8U), PadStrideInfo(1, 1, 2, 2)); + add_config(TensorShape(13U, 13U, 256U, 8U), TensorShape(3U, 3U, 128U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 8U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(13U, 13U, 384U, 8U), TensorShape(3U, 3U, 128U, 384U), TensorShape(384U), TensorShape(13U, 13U, 384U, 8U), PadStrideInfo(1, 1, 1, 1)); + } +}; } // namespace datasets } // namespace test } // namespace arm_compute diff --git a/tests/datasets/SmallConvolutionLayerDataset.h b/tests/datasets/SmallConvolutionLayerDataset.h index ae12dd4b16..a288d07902 100644 --- a/tests/datasets/SmallConvolutionLayerDataset.h +++ b/tests/datasets/SmallConvolutionLayerDataset.h @@ -146,6 +146,41 @@ public: add_config(TensorShape(33U, 27U, 7U, 5U), TensorShape(5U, 7U, 7U, 16U), TensorShape(16U), TensorShape(10U, 11U, 16U, 5U), PadStrideInfo(3, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); } }; + +class SmallGroupedConvolutionLayerDataset final : public ConvolutionLayerDataset +{ +public: + SmallGroupedConvolutionLayerDataset() + { + // Batch size 1 + // Number of groups = 2 + add_config(TensorShape(23U, 27U, 8U), TensorShape(1U, 1U, 4U, 24U), TensorShape(24U), TensorShape(12U, 27U, 24U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(33U, 27U, 12U), TensorShape(5U, 5U, 6U, 16U), TensorShape(16U), TensorShape(11U, 12U, 16U), PadStrideInfo(3, 2, 1, 0)); + // Number of groups = 4 + add_config(TensorShape(23U, 27U, 8U), TensorShape(1U, 1U, 2U, 24U), TensorShape(24U), TensorShape(12U, 27U, 24U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(33U, 27U, 12U), TensorShape(5U, 5U, 4U, 15U), TensorShape(15U), TensorShape(11U, 12U, 15U), PadStrideInfo(3, 2, 1, 0)); + + // Batch size 4 + // Number of groups = 2 + add_config(TensorShape(23U, 27U, 8U, 4U), TensorShape(1U, 1U, 4U, 24U), TensorShape(24U), TensorShape(12U, 27U, 24U, 4U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(33U, 27U, 12U, 4U), TensorShape(5U, 5U, 6U, 16U), TensorShape(16U), TensorShape(11U, 12U, 16U, 4U), PadStrideInfo(3, 2, 1, 0)); + // Number of groups = 4 + add_config(TensorShape(23U, 27U, 8U, 4U), TensorShape(1U, 1U, 2U, 24U), TensorShape(24U), TensorShape(12U, 27U, 24U, 4U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(33U, 27U, 12U, 4U), TensorShape(5U, 5U, 4U, 15U), TensorShape(15U), TensorShape(11U, 12U, 15U, 4U), PadStrideInfo(3, 2, 1, 0)); + + // Arbitrary batch size + add_config(TensorShape(23U, 27U, 8U, 5U), TensorShape(1U, 1U, 4U, 24U), TensorShape(24U), TensorShape(12U, 27U, 24U, 5U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(33U, 27U, 12U, 3U), TensorShape(5U, 5U, 6U, 16U), TensorShape(16U), TensorShape(11U, 12U, 16U, 3U), PadStrideInfo(3, 2, 1, 0)); + // Number of groups = 4 + add_config(TensorShape(23U, 27U, 8U, 2U), TensorShape(1U, 1U, 2U, 24U), TensorShape(24U), TensorShape(12U, 27U, 24U, 2U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(33U, 27U, 12U, 5U), TensorShape(5U, 5U, 4U, 15U), TensorShape(15U), TensorShape(11U, 12U, 15U, 5U), PadStrideInfo(3, 2, 1, 0)); + + // Asymmetric padding + add_config(TensorShape(33U, 27U, 8U, 5U), TensorShape(5U, 7U, 2U, 16U), TensorShape(16U), TensorShape(11U, 12U, 16U, 5U), PadStrideInfo(3, 2, 1, 1, 2, 0, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 8U, 5U), TensorShape(5U, 7U, 4U, 16U), TensorShape(16U), TensorShape(11U, 12U, 16U, 5U), PadStrideInfo(3, 2, 1, 1, 0, 2, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 6U, 5U), TensorShape(5U, 7U, 3U, 16U), TensorShape(16U), TensorShape(11U, 12U, 16U, 5U), PadStrideInfo(3, 2, 2, 1, 2, 0, DimensionRoundingType::FLOOR)); + } +}; } // namespace datasets } // namespace test } // namespace arm_compute diff --git a/tests/validation/CL/ConvolutionLayer.cpp b/tests/validation/CL/ConvolutionLayer.cpp index 54fdc0c386..5c96cd4c59 100644 --- a/tests/validation/CL/ConvolutionLayer.cpp +++ b/tests/validation/CL/ConvolutionLayer.cpp @@ -58,6 +58,14 @@ const auto CNNDataTypes = framework::dataset::make("DataType", DataType::F32, DataType::QASYMM8, }); + +/** Grouped CNN data types */ +const auto GroupedCNNDataTypes = framework::dataset::make("DataType", +{ + DataType::F16, + DataType::F32 +}); + const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", { ActivationLayerInfo(), @@ -219,7 +227,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMConvolutionLayerFixture, framework: // Validate output validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); } -TEST_SUITE_END() +TEST_SUITE_END() // FP16 TEST_SUITE(FP32) @@ -244,8 +252,8 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMConvolutionLayerFixture, framework // Validate output validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, absolute_tolerance_float); } -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() // FP32 +TEST_SUITE_END() // Float template using CLGEMMConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture; @@ -280,11 +288,106 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMConvolutionLayerQuantizedFixture // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); } -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() // QASYMM8 +TEST_SUITE_END() // Quantized -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE_END() // GEMMConvolutionLayer + +template +using CLGEMMGroupedConvolutionLayerFixture = ConvolutionValidationFixture; + +TEST_SUITE(GroupedGEMMConvolutionLayer) + +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallGroupedConvolutionLayerDataset(), datasets::LargeGroupedConvolutionLayerDataset()), + GroupedCNNDataTypes), + ActivationFunctionsDataset), + input_shape, weights_shape, bias_shape, output_shape, info, dilation, data_type, act_info) +{ + ARM_COMPUTE_ERROR_ON((input_shape[2] % weights_shape[2]) != 0); + + // The number of groups is calculated dividing the number of input channels of the input tensor by the number of input channels of the weights shape + const int num_groups = input_shape[2] / weights_shape[2]; + + // Create tensors + CLTensor src = create_tensor(input_shape, data_type); + CLTensor weights = create_tensor(weights_shape, data_type, 1); + CLTensor bias = create_tensor(bias_shape, data_type, 1); + CLTensor dst = create_tensor(output_shape, data_type, 1); + + ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + // Create and configure function + CLGEMMConvolutionLayer conv; + conv.configure(&src, &weights, &bias, &dst, info, WeightsInfo(), dilation, act_info, num_groups); + + // Validate valid region + const ValidRegion src_valid_region = shape_to_valid_region(input_shape); + const ValidRegion weights_valid_region = shape_to_valid_region(weights_shape); + const ValidRegion bias_valid_region = shape_to_valid_region(bias_shape); + const ValidRegion dst_valid_region = shape_to_valid_region(output_shape); + + validate(src.info()->valid_region(), src_valid_region); + validate(weights.info()->valid_region(), weights_valid_region); + validate(bias.info()->valid_region(), bias_valid_region); + validate(dst.info()->valid_region(), dst_valid_region); + + // Validate padding + //TODO(COMPMID-415) Need to validate padding? +} + +TEST_SUITE(Float) +TEST_SUITE(FP32) + +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMGroupedConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallGroupedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), + ActivationFunctionsDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMGroupedConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeGroupedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), + ActivationFunctionsDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num); +} +TEST_SUITE_END() // FP32 + +TEST_SUITE(FP16) + +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMGroupedConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallGroupedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), + ActivationFunctionsDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMGroupedConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeGroupedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW })), + ActivationFunctionsDataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num); +} +TEST_SUITE_END() // FP16 +TEST_SUITE_END() // Float + +TEST_SUITE_END() // GroupedGEMMConvolutionLayer +TEST_SUITE_END() // CL } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/CL/WeightsReshape.cpp b/tests/validation/CL/WeightsReshape.cpp index 6dae0c7625..30c231d499 100644 --- a/tests/validation/CL/WeightsReshape.cpp +++ b/tests/validation/CL/WeightsReshape.cpp @@ -79,7 +79,7 @@ TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, CLWeightsReshapeFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::GroupedWeightsSmallShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("HasBias", { true, false })), - framework::dataset::make("NumGroups", { 1, 2, 3 }))) + framework::dataset::make("NumGroups", { 1, 2, 3, 4 }))) { // Validate output validate(CLAccessor(_target), _reference); @@ -87,7 +87,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWeightsReshapeFixture, framework::Data FIXTURE_DATA_TEST_CASE(RunLarge, CLWeightsReshapeFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::GroupedWeightsLargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("HasBias", { true, false })), - framework::dataset::make("NumGroups", { 1, 2, 3 }))) + framework::dataset::make("NumGroups", { 1, 2, 3, 4 }))) { // Validate output validate(CLAccessor(_target), _reference); diff --git a/tests/validation/fixtures/ConvolutionLayerFixture.h b/tests/validation/fixtures/ConvolutionLayerFixture.h index 4a6326480c..3b420eac09 100644 --- a/tests/validation/fixtures/ConvolutionLayerFixture.h +++ b/tests/validation/fixtures/ConvolutionLayerFixture.h @@ -102,6 +102,10 @@ protected: TensorType compute_target(TensorShape input_shape, TensorShape weights_shape, const TensorShape &bias_shape, TensorShape output_shape, const PadStrideInfo &info, bool reshape_weights, const Size2D &dilation, const ActivationLayerInfo act_info) { + ARM_COMPUTE_ERROR_ON((input_shape[2] % weights_shape[2]) != 0); + + const unsigned int num_groups = input_shape[2] / weights_shape[2]; + if(_data_layout == DataLayout::NHWC) { permute(input_shape, PermutationVector(2U, 0U, 1U)); @@ -123,7 +127,7 @@ protected: // Create and configure function FunctionType conv; - conv.configure(&src, &weights, &bias, &dst, info, weights_info, dilation, act_info); + conv.configure(&src, &weights, &bias, &dst, info, weights_info, dilation, act_info, num_groups); ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -155,6 +159,10 @@ protected: SimpleTensor compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info, const Size2D &dilation, const ActivationLayerInfo act_info) { + ARM_COMPUTE_ERROR_ON((input_shape[2] % weights_shape[2]) != 0); + + const unsigned int num_groups = input_shape[2] / weights_shape[2]; + // Create reference SimpleTensor src{ input_shape, _data_type, 1, _quantization_info }; SimpleTensor weights{ weights_shape, _data_type, 1, _quantization_info }; @@ -165,9 +173,9 @@ protected: fill(weights, 1); fill(bias, 2); - return (act_info.enabled()) ? reference::activation_layer(reference::convolution_layer(src, weights, bias, output_shape, info, dilation), + return (act_info.enabled()) ? reference::activation_layer(reference::convolution_layer(src, weights, bias, output_shape, info, dilation, num_groups), act_info) : - reference::convolution_layer(src, weights, bias, output_shape, info, dilation); + reference::convolution_layer(src, weights, bias, output_shape, info, dilation, num_groups); } TensorType _target{}; @@ -187,7 +195,8 @@ public: void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type, DataLayout data_layout, ActivationLayerInfo act_info) { - ConvolutionValidationGenericFixture::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, reshape_weights, data_type, data_layout, + ConvolutionValidationGenericFixture::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, reshape_weights, + data_type, data_layout, QuantizationInfo(), act_info); } }; diff --git a/tests/validation/reference/ConvolutionLayer.cpp b/tests/validation/reference/ConvolutionLayer.cpp index 2d314059dd..f41a6fc8c4 100644 --- a/tests/validation/reference/ConvolutionLayer.cpp +++ b/tests/validation/reference/ConvolutionLayer.cpp @@ -47,8 +47,10 @@ namespace template SimpleTensor convolution_layer_nchw(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, SimpleTensor &dst, const PadStrideInfo &info, - const Size2D &dilation) + const Size2D &dilation, unsigned int num_groups) { + ARM_COMPUTE_ERROR_ON((src.shape()[2] / num_groups) != weights.shape()[2]); + // Compute reference const int width_in = src.shape().x(); const int height_in = src.shape().y(); @@ -78,23 +80,28 @@ SimpleTensor convolution_layer_nchw(const SimpleTensor &src, const SimpleT { for(int xi = start_xi; xi < start_xi + end_xi; xi += stride_xi) { - for(int ofm = 0; ofm < depth_out; ++ofm) + for(int group = 0; group < static_cast(num_groups); ++group) { - // Compute input and output offsets - const int offset_in = r * width_in * height_in * depth_in; - const int xo = (xi - start_xi) / stride_xi; - const int yo = (yi - start_yi) / stride_yi; - const int offset_out = xo + yo * width_out + ofm * width_out * height_out + r * width_out * height_out * depth_out; + for(int ofm = 0; ofm < static_cast(depth_out / num_groups); ++ofm) + { + // Compute input and output offsets + const int offset_in = r * width_in * height_in * depth_in + (group * (depth_in / num_groups) * width_in * height_in); + const int xo = (xi - start_xi) / stride_xi; + const int yo = (yi - start_yi) / stride_yi; + const int offset_out = xo + yo * width_out + ((ofm + group * (depth_out / num_groups)) * width_out * height_out) + (r * width_out * height_out * depth_out); + const int offset_w = (ofm + group * (depth_out / num_groups)) * width_weights * height_weights * depth_weights; + const int offset_b = (ofm + group * (depth_out / num_groups)); - ARM_COMPUTE_ASSERT(xo < width_out); - ARM_COMPUTE_ASSERT(yo < height_out); + ARM_COMPUTE_ASSERT(xo < width_out); + ARM_COMPUTE_ASSERT(yo < height_out); - // Compute 3D convolution - convolution_3d::detail::convolution3d(src, weights, bias, dst, - offset_in, ofm * width_weights * height_weights * depth_weights, ofm, offset_out, - xi, yi, - width_in, height_in, depth_in, - width_weights, height_weights, dilation.x(), dilation.y()); + // Compute 3D convolution + convolution_3d::detail::convolution3d(src, weights, bias, dst, + offset_in, offset_w, offset_b, offset_out, + xi, yi, + width_in, height_in, (depth_in / num_groups), + width_weights, height_weights, dilation.x(), dilation.y()); + } } } } @@ -104,7 +111,7 @@ SimpleTensor convolution_layer_nchw(const SimpleTensor &src, const SimpleT } template SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const TensorShape &output_shape, const PadStrideInfo &info, - const Size2D &dilation) + const Size2D &dilation, unsigned int num_groups) { // Create reference SimpleTensor dst{ output_shape, src.data_type(), 1, src.quantization_info() }; @@ -115,20 +122,20 @@ SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor SimpleTensor weights_nchw = reference::permute(weights, PermutationVector(1U, 2U, 0U)); SimpleTensor dst_nchw = reference::permute(dst, PermutationVector(1U, 2U, 0U)); - return reference::permute(convolution_layer_nchw(src_nchw, weights_nchw, bias, dst_nchw, info, dilation), PermutationVector(2U, 0U, 1U)); + return reference::permute(convolution_layer_nchw(src_nchw, weights_nchw, bias, dst_nchw, info, dilation, num_groups), PermutationVector(2U, 0U, 1U)); } else { - return convolution_layer_nchw(src, weights, bias, dst, info, dilation); + return convolution_layer_nchw(src, weights, bias, dst, info, dilation, num_groups); } } template SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const TensorShape &output_shape, - const PadStrideInfo &info, const Size2D &dilation); + const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups); template SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const TensorShape &output_shape, - const PadStrideInfo &info, const Size2D &dilation); + const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups); template SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const TensorShape &output_shape, - const PadStrideInfo &info, const Size2D &dilation); + const PadStrideInfo &info, const Size2D &dilation, unsigned int num_groups); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/ConvolutionLayer.h b/tests/validation/reference/ConvolutionLayer.h index ff3b1531f4..ccce53a209 100644 --- a/tests/validation/reference/ConvolutionLayer.h +++ b/tests/validation/reference/ConvolutionLayer.h @@ -37,7 +37,7 @@ namespace reference { template SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const TensorShape &output_shape, const PadStrideInfo &info, - const Size2D &dilation = Size2D(1U, 1U)); + const Size2D &dilation = Size2D(1U, 1U), unsigned int num_groups = 1); } // namespace reference } // namespace validation } // namespace test -- cgit v1.2.1