aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2018-08-13 11:20:41 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commit916d1bcee42051721a82cfb46b52855c2fe56646 (patch)
treee3e38a8deddc558cabeda6fb7d14b2d45c8db2c4 /arm_compute
parent61de78aba1b405663c6620be15418873a2ee914a (diff)
downloadComputeLibrary-916d1bcee42051721a82cfb46b52855c2fe56646.tar.gz
COMPMID-1498 - Enable grouping in CLGEMMConvolutionLayer
Change-Id: I15c7df21773145b03f42b6f78bd7ad2e5b8a5219 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/144126 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/CL/kernels/CLCol2ImKernel.h5
-rw-r--r--arm_compute/core/CL/kernels/CLIm2ColKernel.h4
-rw-r--r--arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h8
-rw-r--r--arm_compute/core/NEON/kernels/NEIm2ColKernel.h4
-rw-r--r--arm_compute/core/utils/misc/ShapeCalculator.h9
-rw-r--r--arm_compute/runtime/CL/functions/CLConvolutionLayer.h9
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h28
-rw-r--r--arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h23
-rw-r--r--arm_compute/runtime/NEON/functions/NEConvolutionLayer.h9
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h6
10 files changed, 49 insertions, 56 deletions
diff --git a/arm_compute/core/CL/kernels/CLCol2ImKernel.h b/arm_compute/core/CL/kernels/CLCol2ImKernel.h
index 5c047ca091..2a18ae08c8 100644
--- a/arm_compute/core/CL/kernels/CLCol2ImKernel.h
+++ b/arm_compute/core/CL/kernels/CLCol2ImKernel.h
@@ -63,12 +63,11 @@ public:
CLCol2ImKernel &operator=(CLCol2ImKernel &&) = default;
/** Default destructor */
~CLCol2ImKernel() = default;
-
/** Set the input and output of the kernel.
*
* @param[in] input The input tensor to convert. Data types supported: QASYMM8/F16/F32
* @param[out] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
- * while the rest represent batch of outputs. Data types supported: Same as @p input
+ * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW
* @param[in] convolved_dims Output convolved dimensions.
* @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
*/
@@ -77,7 +76,7 @@ public:
*
* @param[in] input The input tensor to convert. Data types supported: QASYMM8/F16/F32
* @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM],
- * while the rest represent batch of outputs. Data types supported: Same as @p input
+ * while the rest represent batch of outputs. Data types supported: Same as @p input. Data layout: NCHW
* @param[in] convolved_dims Output convolved dimensions.
* @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
*
diff --git a/arm_compute/core/CL/kernels/CLIm2ColKernel.h b/arm_compute/core/CL/kernels/CLIm2ColKernel.h
index c678f277cb..0647f5dcec 100644
--- a/arm_compute/core/CL/kernels/CLIm2ColKernel.h
+++ b/arm_compute/core/CL/kernels/CLIm2ColKernel.h
@@ -76,7 +76,7 @@ public:
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
* @param[in] has_bias In case biases are provided expands the matrix with 1.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
*/
void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U),
unsigned int num_groups = 1);
@@ -90,7 +90,7 @@ public:
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
* @param[in] has_bias In case biases are provided expands the matrix with 1.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
*
* @return a status
*/
diff --git a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
index d3bbbaf470..bdc5792641 100644
--- a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
+++ b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h
@@ -75,10 +75,10 @@ public:
* @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
* @param[out] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise.
* Data types supported: Same as @p input
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution.
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
* Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it.
*/
- void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, const unsigned int num_groups = 1);
+ void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1);
/** Static function to check if given info will lead to a valid configuration of @ref CLWeightsReshapeKernel
*
* @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared,
@@ -88,12 +88,12 @@ public:
* @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types.
* @param[in] output The output tensor. Should be a 2D Tensor if there are no groups and the weights are not shared; a 3D Tensor otherwise.
* Data types supported: Same as @p input
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution.
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
* Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, const unsigned int num_groups = 1);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups = 1);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
index 37145a38c1..38bdff0e98 100644
--- a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
+++ b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h
@@ -84,7 +84,7 @@ public:
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
* @param[in] has_bias In case biases are provided expands the matrix with 1.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
* @param[in] is_fully_connected (Optional) Determines whether this kernel will be called by @ref NEFullyConnectedLayer in order to validate the arguments
* @param[in] is_flatten (Optional) Determines whether this kernel will be called by @ref NEFlattenLayer in order to validate the arguments
*/
@@ -100,7 +100,7 @@ public:
* @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
* @param[in] has_bias In case biases are provided expands the matrix with 1.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
* @param[in] is_fully_connected (Optional)Determines whether this kernel will be called by @ref NEFullyConnectedLayer in order to validate the arguments
* @param[in] is_flatten (Optional) Determines whether this kernel will be called by @ref NEFlattenLayer in order to validate the arguments
*
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index 8a00c22306..c40e7119b2 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -55,19 +55,20 @@ inline TensorShape compute_permutation_output_shape(const ITensorInfo &input, co
permute(output_shape, perm);
return output_shape;
}
-inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias = false, const unsigned int num_groups = 1)
+inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias = false, unsigned int num_groups = 1)
{
// Number of groups greater than one are only supported for NCHW data layout, and the number of weights must be a multiple of it.
-
ARM_COMPUTE_ERROR_ON(num_groups == 0);
- ARM_COMPUTE_ERROR_ON((weights.dimension(3) % num_groups) != 0);
ARM_COMPUTE_ERROR_ON(weights.data_layout() == DataLayout::NHWC && num_groups > 1);
+ ARM_COMPUTE_ERROR_ON((weights.dimension(3) % num_groups) != 0);
// Calculate output shape
TensorShape weights_reshaped{ weights.tensor_shape() };
+ weights_reshaped.set(3, weights_reshaped[3] / num_groups);
+
weights_reshaped.collapse(3);
const size_t tmp_dim = weights_reshaped[0];
- weights_reshaped.set(0, weights_reshaped[1] / num_groups);
+ weights_reshaped.set(0, weights_reshaped[1]);
weights_reshaped.set(1, tmp_dim + (has_bias ? 1 : 0));
if(weights.num_dimensions() < 5)
{
diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
index c9a74f2a4c..8270e9723e 100644
--- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h
@@ -60,10 +60,11 @@ public:
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
- * available which may introduce a drop of accuracy as well. Default is false
+ * available which may introduce a drop of accuracy as well. Default is false
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
*/
void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
- const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
+ const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, unsigned int num_groups = 1);
/** Static function to check if given info will lead to a valid configuration of @ref CLConvolutionLayer
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -79,11 +80,13 @@ public:
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
* available which may introduce a drop of accuracy as well. Default is false
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
*
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
+ const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false,
+ unsigned int num_groups = 1);
/** Static function to check if given info will return the convolution called by @ref CLConvolutionLayer
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
index 8538d83c2b..c6ae535713 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
@@ -59,22 +59,24 @@ public:
CLConvolutionLayerReshapeWeights();
/** Set the input and output tensors.
*
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: QASYMM8/F16/F32.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
- * @param[out] output Destination tensor. Data types supported: Same as @p weights.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: QASYMM8/F16/F32.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
+ * @param[out] output Destination tensor. Data types supported: Same as @p weights.
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
*/
- void configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output);
+ void configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups = 1);
/** Static function to check if given info will lead to a valid configuration of @ref CLConvolutionLayerReshapeWeights
*
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
- * Data type supported: QASYMM8/F16/F32.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
- * @param[in] output Destination tensor. Data types supported: Same as @p weights.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM].
+ * Data type supported: QASYMM8/F16/F32.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
+ * @param[in] output Destination tensor. Data types supported: Same as @p weights.
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
*
* @return a status
*/
- static Status validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output);
+ static Status validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups = 1);
// Inherited methods overridden:
void run() override;
@@ -122,9 +124,10 @@ public:
* tensor has also been transposed with CLGEMMTranspose1xWKernel. Data type supported: Same as @p input.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
*/
void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
- const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
/** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer.
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -140,11 +143,12 @@ public:
* tensor has also been transposed with CLGEMMTranspose1xWKernel. Data type supported: Same as @p input.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is only supported for NCHW data layout
*
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h
index 421150e18e..4fac95e72f 100644
--- a/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h
@@ -101,29 +101,10 @@ public:
* tensor has also been transposed with GCGEMMTranspose1xWKernel. Data type supported: Same as @p input.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
*/
void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref GCConvolutionLayer.
- *
- * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
- * while every optional dimension from 4 and above represent a batch of inputs.
- * Data types supported: QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
- * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
- * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
- * Data types supported: Same as @p input.
- * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo.
- * @param[in] weights_info Specifies if the weights tensor has been reshaped with GCWeightsReshapeKernel. If this is not part of the fully connected layer the weights
- * tensor has also been transposed with GCGEMMTranspose1xWKernel. Data type supported: Same as @p input.
- * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
- * @param[in] act_info (Optional) Activation layer information in case of a fused activation.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
// Inherited methods overridden:
void run() override;
diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
index c4226cbc5d..5b53aec9e6 100644
--- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h
@@ -64,10 +64,11 @@ public:
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
- * available which may introduce a drop of accuracy as well. Default is false
+ * available which may introduce a drop of accuracy as well. Default is false
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
*/
void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
- const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
+ const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false, unsigned int num_groups = 1);
/** Static function to check if given info will lead to a valid configuration of @ref NEConvolutionLayer
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -85,11 +86,13 @@ public:
* @param[in] act_info (Optional) Activation layer information in case of a fused activation.
* @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation
* available which may introduce a drop of accuracy as well. Default is false
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
*
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false);
+ const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), bool enable_fast_math = false,
+ unsigned int num_groups = 1);
/** Static function to check if given info will return the convolution called by @ref NEConvolutionLayer
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index e587cb4e6f..dc00b17efc 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -116,9 +116,10 @@ public:
* tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
*/
void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(),
- const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
/** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer
*
* @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM],
@@ -134,11 +135,12 @@ public:
* tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input.
* @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1).
* @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU supported.
+ * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution. num_groups != 1 is not supported
*
* @return a status
*/
static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info,
- const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo());
+ const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U), const ActivationLayerInfo &act_info = ActivationLayerInfo(), unsigned int num_groups = 1);
// Inherited methods overridden:
void run() override;