From 916d1bcee42051721a82cfb46b52855c2fe56646 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Mon, 13 Aug 2018 11:20:41 +0100 Subject: COMPMID-1498 - Enable grouping in CLGEMMConvolutionLayer Change-Id: I15c7df21773145b03f42b6f78bd7ad2e5b8a5219 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/144126 Tested-by: Jenkins Reviewed-by: Giorgio Arena Reviewed-by: Georgios Pinitas --- src/core/CL/kernels/CLCol2ImKernel.cpp | 3 +- src/core/CL/kernels/CLIm2ColKernel.cpp | 2 + src/core/CL/kernels/CLWeightsReshapeKernel.cpp | 25 ++++++------- src/runtime/CL/functions/CLConvolutionLayer.cpp | 15 +++++--- .../CL/functions/CLGEMMConvolutionLayer.cpp | 43 ++++++++++++---------- .../GLES_COMPUTE/functions/GCConvolutionLayer.cpp | 4 +- src/runtime/NEON/functions/NEConvolutionLayer.cpp | 7 +++- .../NEON/functions/NEGEMMConvolutionLayer.cpp | 10 +++-- 8 files changed, 64 insertions(+), 45 deletions(-) (limited to 'src') diff --git a/src/core/CL/kernels/CLCol2ImKernel.cpp b/src/core/CL/kernels/CLCol2ImKernel.cpp index d7582dc943..40032f97c4 100644 --- a/src/core/CL/kernels/CLCol2ImKernel.cpp +++ b/src/core/CL/kernels/CLCol2ImKernel.cpp @@ -119,7 +119,8 @@ void CLCol2ImKernel::configure(const ICLTensor *input, ICLTensor *output, std::p _config_id = "col2im_"; _config_id += lower_string(string_from_data_type(input->info()->data_type())); _config_id += "_"; - _config_id += (num_groups > 1) ? "grouping_" : ""; + _config_id += support::cpp11::to_string(num_groups); + _config_id += "_"; _config_id += support::cpp11::to_string(input->info()->dimension(0)); _config_id += "_"; _config_id += support::cpp11::to_string(input->info()->dimension(1)); diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index 3d71567eee..0ba0d0e2f9 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -324,6 +324,8 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const _config_id += "_"; _config_id += lower_string(string_from_data_type(input->info()->data_type())); _config_id += "_"; + _config_id += support::cpp11::to_string(num_groups); + _config_id += "_"; _config_id += support::cpp11::to_string(output->info()->dimension(0)); _config_id += "_"; _config_id += support::cpp11::to_string(output->info()->dimension(1)); diff --git a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp index 5ef0f5b152..7639a48a5f 100644 --- a/src/core/CL/kernels/CLWeightsReshapeKernel.cpp +++ b/src/core/CL/kernels/CLWeightsReshapeKernel.cpp @@ -38,7 +38,7 @@ using namespace arm_compute::misc::shape_calculator; namespace { -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, const unsigned int num_groups) +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); @@ -75,12 +75,12 @@ CLWeightsReshapeKernel::CLWeightsReshapeKernel() { } -void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, const unsigned int num_groups) +void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); // Output tensor auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(compute_weights_reshaped_shape(*input->info(), (biases != nullptr)))); + auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(compute_weights_reshaped_shape(*input->info(), (biases != nullptr), num_groups))); // Perform validation step ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), @@ -102,15 +102,6 @@ void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor * // Create kernel _kernel = static_cast(CLKernelLibrary::get().create_kernel("reshape_to_columns", build_opts.options())); - // Set static arguments - unsigned int idx = num_arguments_per_3D_tensor() + num_arguments_per_2D_tensor(); - idx += (biases != nullptr) ? num_arguments_per_1D_tensor() : 0; - _kernel.setArg(idx++, _input->info()->dimension(0)); - _kernel.setArg(idx++, _input->info()->dimension(1)); - _kernel.setArg(idx++, _input->info()->dimension(2)); - _kernel.setArg(idx++, _input->info()->dimension(3)); - _kernel.setArg(idx++, _output->info()->strides_in_bytes().z()); - // Configure window Window win = calculate_max_window(*input->info(), Steps()); // The CLWeightsReshapeKernel doesn't need padding so update_window_and_padding() can be skipped @@ -118,7 +109,7 @@ void CLWeightsReshapeKernel::configure(const ICLTensor *input, const ICLTensor * ICLKernel::configure_internal(win); } -Status CLWeightsReshapeKernel::validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, const unsigned int num_groups) +Status CLWeightsReshapeKernel::validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, biases, output, num_groups)); return Status{}; @@ -138,6 +129,14 @@ void CLWeightsReshapeKernel::run(const Window &window, cl::CommandQueue &queue) Window biases_window; Window biases_slice; + unsigned int idx = num_arguments_per_3D_tensor() + num_arguments_per_2D_tensor(); + idx += (_biases != nullptr) ? num_arguments_per_1D_tensor() : 0; + _kernel.setArg(idx++, _input->info()->dimension(0)); + _kernel.setArg(idx++, _input->info()->dimension(1)); + _kernel.setArg(idx++, _input->info()->dimension(2)); + _kernel.setArg(idx++, _input->info()->dimension(3)); + _kernel.setArg(idx++, _output->info()->strides_in_bytes().z()); + if(_biases != nullptr) { biases_window.use_tensor_dimensions(_biases->info()->tensor_shape()); diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp index d36cce6505..0014e71734 100644 --- a/src/runtime/CL/functions/CLConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp @@ -43,17 +43,18 @@ CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr memory_ma } void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, - const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math) + const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info, - enable_fast_math)); + enable_fast_math, num_groups)); switch(CLConvolutionLayer::get_convolution_method(input->info(), weights->info(), output->info(), conv_info, weights_info, act_info, CLScheduler::get().target(), dilation, enable_fast_math)) { case ConvolutionMethod::WINOGRAD: { + ARM_COMPUTE_ERROR_ON(num_groups != 1); auto f = arm_compute::support::cpp14::make_unique(_memory_manager); f->configure(input, weights, biases, output, conv_info, act_info, enable_fast_math); _function = std::move(f); @@ -61,6 +62,7 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c } case ConvolutionMethod::DIRECT: { + ARM_COMPUTE_ERROR_ON(num_groups != 1); auto f = arm_compute::support::cpp14::make_unique(); f->configure(input, weights, biases, output, conv_info, act_info); _function = std::move(f); @@ -69,7 +71,7 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c case ConvolutionMethod::GEMM: { auto f = arm_compute::support::cpp14::make_unique(_memory_manager); - f->configure(input, weights, biases, output, conv_info, weights_info, dilation, act_info); + f->configure(input, weights, biases, output, conv_info, weights_info, dilation, act_info, num_groups); _function = std::move(f); break; } @@ -80,9 +82,10 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c } Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math) + const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); + ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1) && (input->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported"); const GPUTarget gpu_target = CLScheduler::get().target(); @@ -91,19 +94,21 @@ Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo case ConvolutionMethod::WINOGRAD: { //Validate Winograd + ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups != 1, "Grouping (num_groups != 1) with CLWinogradConvolutionLayer is not supported"); ARM_COMPUTE_RETURN_ON_ERROR(CLWinogradConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info, enable_fast_math)); break; } case ConvolutionMethod::DIRECT: { // Validate direct convolution layer + ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups != 1, "Grouping (num_groups != 1) with CLDirectConvolutionLayer is not supported"); ARM_COMPUTE_RETURN_ON_ERROR(CLDirectConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info)); break; } case ConvolutionMethod::GEMM: { // Validate gemm-based convolution layer - ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info)); + ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info, num_groups)); break; } default: diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp index 1e639d9dff..782fe710e7 100644 --- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp @@ -43,23 +43,24 @@ CLConvolutionLayerReshapeWeights::CLConvolutionLayerReshapeWeights() { } -void CLConvolutionLayerReshapeWeights::configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output) +void CLConvolutionLayerReshapeWeights::configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, unsigned int num_groups) { // Perform validation step ARM_COMPUTE_ERROR_ON_NULLPTR(weights, output); ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayerReshapeWeights::validate(weights->info(), (biases != nullptr) ? biases->info() : nullptr, - output->info())); + output->info(), + num_groups)); const bool append_biases = (biases != nullptr) && !is_data_type_quantized_asymmetric(weights->info()->data_type()); const ICLTensor *biases_to_use = (append_biases) ? biases : nullptr; - _weights_reshape_kernel.configure(weights, biases_to_use, output); + _weights_reshape_kernel.configure(weights, biases_to_use, output, num_groups); output->info()->set_quantization_info(weights->info()->quantization_info()); } -Status CLConvolutionLayerReshapeWeights::validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output) +Status CLConvolutionLayerReshapeWeights::validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, unsigned int num_groups) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(weights); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8, DataType::F16, DataType::F32); @@ -78,7 +79,7 @@ Status CLConvolutionLayerReshapeWeights::validate(const ITensorInfo *weights, co { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, output); - CLWeightsReshapeKernel::validate(weights, biases, output); + CLWeightsReshapeKernel::validate(weights, biases, output, num_groups); } return Status{}; @@ -153,7 +154,7 @@ Status CLGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens } void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, - const Size2D &dilation, const ActivationLayerInfo &act_info) + const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); @@ -164,7 +165,8 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * conv_info, weights_info, dilation, - act_info)); + act_info, + num_groups)); const DataType data_type = input->info()->data_type(); const DataLayout data_layout = input->info()->data_layout(); @@ -208,11 +210,11 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * conv_info, dilation); - unsigned int mat_weights_cols = weights->info()->dimension(idx_kernels); + unsigned int mat_weights_cols = weights->info()->dimension(idx_kernels) / num_groups; // _weights_reshaped will be auto configured in the kernel. // Just append biases and do not transpose 1xW as it will be reshaped in CLGEMM - _reshape_weights.configure(weights, biases_to_use, &_weights_reshaped); + _reshape_weights.configure(weights, biases_to_use, &_weights_reshaped, num_groups); // Create tensor to store im2col reshaped inputs if(!_skip_im2col) @@ -220,7 +222,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * _memory_group.manage(&_im2col_output); // Configure and tune im2col. im2col output shape is auto-initialized - _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, _append_bias, dilation); + _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, _append_bias, dilation, num_groups); // Set quantization info _im2col_output.info()->set_quantization_info(input->info()->quantization_info()); @@ -283,7 +285,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * if(input->info()->data_layout() == DataLayout::NCHW) { // Configure and tune Col2Im - _col2im_kernel.configure(_is_quantized ? gemm_output_staged_to_use : gemm_output_to_use, output, std::make_pair(conv_w, conv_h)); + _col2im_kernel.configure(_is_quantized ? gemm_output_staged_to_use : gemm_output_to_use, output, std::make_pair(conv_w, conv_h), num_groups); CLScheduler::get().tune_kernel_static(_col2im_kernel); } else @@ -314,13 +316,16 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * } Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info) + const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!"); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights); + ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1) && (input->data_layout() != DataLayout::NCHW), "Grouping (num_groups != 1) with NHWC data layout is not supported"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1) && (input->data_type() == DataType::QASYMM8), "Grouping (num_groups != 1) is not supported with QASYMM8"); + ARM_COMPUTE_RETURN_ERROR_ON(((input->dimension(2) / weights->dimension(2)) != num_groups) && (input->data_layout() == DataLayout::NCHW)); const DataLayout data_layout = input->data_layout(); const DataType data_type = input->data_type(); @@ -343,7 +348,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI const bool skip_im2col = (data_layout == DataLayout::NHWC && kernel_width == 1 && kernel_height == 1 && conv_info.stride().first == 1 && conv_info.stride().second == 1) && !is_quantized; const bool append_bias = (biases != nullptr) && (!is_quantized); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_channel) != input->dimension(idx_channel)); + ARM_COMPUTE_RETURN_ERROR_ON((weights->dimension(idx_channel) * num_groups) != input->dimension(idx_channel)); ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4); // Validate biases @@ -377,11 +382,11 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI conv_info, dilation); - unsigned int mat_weights_cols = weights->dimension(idx_kernels); + unsigned int mat_weights_cols = weights->dimension(idx_kernels) / num_groups; // Output tensor auto inizialitation if not yet initialized - ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayerReshapeWeights::validate(weights, is_quantized ? nullptr : biases, nullptr)); - weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, (append_bias && !skip_im2col)), 1, data_type); + ARM_COMPUTE_RETURN_ON_ERROR(CLConvolutionLayerReshapeWeights::validate(weights, is_quantized ? nullptr : biases, nullptr, num_groups)); + weights_reshaped_info = TensorInfo(compute_weights_reshaped_shape(*weights, (append_bias && !skip_im2col), num_groups), 1, data_type); weights_to_use = &weights_reshaped_info; if(!skip_im2col) @@ -389,11 +394,11 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI const Size2D kernel_dims(kernel_width, kernel_height); // Output tensor auto initialization if not yet initialized - TensorShape expected_output_shape = compute_im2col_conv_shape(input, kernel_dims, conv_info, append_bias, dilation, true /* num_groups == 1, num_groups */); + TensorShape expected_output_shape = compute_im2col_conv_shape(input, kernel_dims, conv_info, append_bias, dilation, num_groups == 1, num_groups); auto_init_if_empty(im2col_reshaped_info, input->clone()->set_tensor_shape(expected_output_shape)); - ARM_COMPUTE_RETURN_ON_ERROR(CLIm2ColKernel::validate(input, &im2col_reshaped_info, kernel_dims, conv_info, append_bias, dilation)); + ARM_COMPUTE_RETURN_ON_ERROR(CLIm2ColKernel::validate(input, &im2col_reshaped_info, kernel_dims, conv_info, append_bias, dilation, num_groups)); gemm_input_to_use = &im2col_reshaped_info; } else if(append_bias) @@ -438,7 +443,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI { ARM_COMPUTE_RETURN_ON_ERROR(CLCol2ImKernel::validate(is_quantized ? gemm_output_staged_to_use : gemm_output_to_use, output, - std::make_pair(conv_w, conv_h))); + std::make_pair(conv_w, conv_h), num_groups)); } } diff --git a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp index 5cfd72f724..c58d184afa 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp @@ -88,7 +88,7 @@ Status GCConvolutionLayer::validate_mm(const ITensorInfo *input, const ITensorIn } void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, - const Size2D &dilation, const ActivationLayerInfo &act_info) + const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); @@ -96,6 +96,8 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig ARM_COMPUTE_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!"); ARM_COMPUTE_ERROR_ON(weights->info()->dimension(2) != input->info()->dimension(2)); ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4); + ARM_COMPUTE_ERROR_ON(num_groups > 1); + ARM_COMPUTE_UNUSED(num_groups); _is_prepared = false; _original_weights = weights; diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp index d71fd5a715..931e5db65b 100644 --- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp @@ -42,10 +42,11 @@ NEConvolutionLayer::NEConvolutionLayer(std::shared_ptr memory_ma } void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, - const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math) + const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups) { // Perform validate step ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); + ARM_COMPUTE_UNUSED(num_groups); ARM_COMPUTE_ERROR_THROW_ON(NEConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation, act_info, enable_fast_math)); @@ -79,8 +80,10 @@ void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const } Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math) + const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, bool enable_fast_math, unsigned int num_groups) { + ARM_COMPUTE_RETURN_ERROR_ON_MSG((num_groups != 1), "Grouping (num_groups != 1) is not supported on NEON"); + switch(NEConvolutionLayer::get_convolution_method(input, weights, output, conv_info, weights_info, dilation, act_info)) { case ConvolutionMethod::WINOGRAD: diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp index 52b461e255..b76cf6aa10 100644 --- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp @@ -167,10 +167,10 @@ Status NEGEMMConvolutionLayer::validate_gemm3d(DataType data_type, int gemm_3d_d } void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, - const Size2D &dilation, const ActivationLayerInfo &act_info) + const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - + ARM_COMPUTE_UNUSED(num_groups); ARM_COMPUTE_ERROR_THROW_ON(NEGEMMConvolutionLayer::validate(input->info(), weights->info(), biases != nullptr ? biases->info() : nullptr, @@ -178,7 +178,8 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig conv_info, weights_info, dilation, - act_info)); + act_info, + num_groups)); const DataType data_type = input->info()->data_type(); const DataLayout data_layout = input->info()->data_layout(); @@ -346,13 +347,14 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig } Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info) + const WeightsInfo &weights_info, const Size2D &dilation, const ActivationLayerInfo &act_info, unsigned int num_groups) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!"); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups > 1, "Grouping (num_groups != 1) is not supported on NEON"); const DataLayout data_layout = input->data_layout(); const DataType data_type = input->data_type(); -- cgit v1.2.1