From 0f170396e84836ad8c54d54421e95c61812968be Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Wed, 18 Jul 2018 16:13:12 +0100 Subject: COMPMID-1342 Add grouping support to CLIm2ColKernel Change-Id: I4afb19751520a90fee27fb49b775cd10e92a94f5 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/140476 Reviewed-by: Gian Marco Iodice Tested-by: Jenkins --- src/runtime/CL/functions/CLGEMM.cpp | 6 ++---- src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp | 2 +- src/runtime/NEON/functions/NEFlattenLayer.cpp | 2 +- src/runtime/NEON/functions/NEFullyConnectedLayer.cpp | 4 ++-- src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp | 4 ++-- src/runtime/NEON/functions/NEIm2Col.cpp | 10 ++++++---- src/runtime/NEON/functions/NELocallyConnectedLayer.cpp | 2 +- 7 files changed, 15 insertions(+), 15 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/CL/functions/CLGEMM.cpp b/src/runtime/CL/functions/CLGEMM.cpp index a8d7058f2a..1ad8531920 100644 --- a/src/runtime/CL/functions/CLGEMM.cpp +++ b/src/runtime/CL/functions/CLGEMM.cpp @@ -181,7 +181,6 @@ Status CLGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso TensorInfo tmp_a_info{}; TensorInfo tmp_b_info{}; - TensorInfo tmp_output_info{}; // Get the GPU target const GPUTarget gpu_target = CLScheduler::get().target(); @@ -229,13 +228,12 @@ Status CLGEMM::validate(const ITensorInfo *a, const ITensorInfo *b, const ITenso } // Validate matrix multiply - auto_init_if_empty(tmp_output_info, matrix_a_info->clone()->set_tensor_shape(compute_mm_shape(*matrix_a_info, *matrix_b_info, run_interleave_transpose, reshape_info))); - ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMMatrixMultiplyKernel::validate(matrix_a_info, matrix_b_info, &tmp_output_info, alpha, run_interleave_transpose, reshape_info, gpu_target)); + ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMMatrixMultiplyKernel::validate(matrix_a_info, matrix_b_info, output, alpha, run_interleave_transpose, reshape_info, gpu_target)); if(beta != 0 && c != nullptr) { // Validate matrix addition kernel - ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMMatrixAdditionKernel::validate(c, &tmp_output_info, beta)); + ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMMatrixAdditionKernel::validate(c, output, beta)); } return Status{}; diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp index ca6157ef13..26fd906dd1 100644 --- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp @@ -391,7 +391,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI const Size2D kernel_dims(kernel_width, kernel_height); // Output tensor auto initialization if not yet initialized - TensorShape expected_output_shape = compute_im2col_conv_shape(input, kernel_dims, conv_info, append_bias, dilation, true); + TensorShape expected_output_shape = compute_im2col_conv_shape(input, kernel_dims, conv_info, append_bias, dilation, true /* num_groups == 1, num_groups */); auto_init_if_empty(im2col_reshaped_info, input->clone()->set_tensor_shape(expected_output_shape)); diff --git a/src/runtime/NEON/functions/NEFlattenLayer.cpp b/src/runtime/NEON/functions/NEFlattenLayer.cpp index 32edf93b63..1814d61e2f 100644 --- a/src/runtime/NEON/functions/NEFlattenLayer.cpp +++ b/src/runtime/NEON/functions/NEFlattenLayer.cpp @@ -32,6 +32,6 @@ using namespace arm_compute; void NEFlattenLayer::configure(const ITensor *input, ITensor *output) { auto k = arm_compute::support::cpp14::make_unique(); - k->configure(input, output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false, false, true); + k->configure(input, output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false, Size2D(1U, 1U), 1, false, true); _kernel = std::move(k); } \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp index c2f0283d4e..f1606aa93e 100644 --- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp @@ -118,7 +118,7 @@ void NEFullyConnectedLayer::configure_conv_fc(const ITensor *input, const ITenso // Configure im2col kernel _memory_group.manage(&_im2col_output); - _im2col_kernel.configure(input, &_im2col_output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false, true); + _im2col_kernel.configure(input, &_im2col_output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false, Size2D(1U, 1U), 1, true); // Configure matrix multiply kernel configure_mm(&_im2col_output, weights, output); @@ -308,7 +308,7 @@ Status NEFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn ARM_COMPUTE_RETURN_ERROR_ON((weights_to_use->dimension(1) != (input->dimension(0) * input->dimension(1) * input->dimension(2)))); // Validate im2col kernel - ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &im2col_input, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false, true)); + ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &im2col_input, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false, Size2D(1U, 1U), 1, true)); input_to_use = &im2col_input; } else diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp index df4a040bad..33284470f4 100644 --- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp @@ -233,7 +233,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig _memory_group.manage(&_im2col_output); // Configure - _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, _append_bias, false, false, dilation); + _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, _append_bias, dilation); // Update GEMM input gemm_input_to_use = &_im2col_output; @@ -401,7 +401,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI im2col_reshaped_info = TensorInfo(shape_im2col, 1, data_type); im2col_reshaped_info.set_quantization_info(input->quantization_info()); - ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, append_bias, false, false, dilation)); + ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation)); gemm_input_to_use = &im2col_reshaped_info; } else if(append_bias) diff --git a/src/runtime/NEON/functions/NEIm2Col.cpp b/src/runtime/NEON/functions/NEIm2Col.cpp index 6b95cb0256..4245b650e2 100644 --- a/src/runtime/NEON/functions/NEIm2Col.cpp +++ b/src/runtime/NEON/functions/NEIm2Col.cpp @@ -34,16 +34,18 @@ NEIm2Col::NEIm2Col() { } -void NEIm2Col::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, bool is_fully_connected, bool is_flatten) +void NEIm2Col::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, unsigned int num_groups, + bool is_fully_connected, bool is_flatten) { _y_dim = get_data_layout_dimension_index(input->info()->data_layout(), DataLayoutDimension::HEIGHT); - _kernel.configure(input, output, kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten); + _kernel.configure(input, output, kernel_dims, conv_info, has_bias, dilation, num_groups, is_fully_connected, is_flatten); } -Status NEIm2Col::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, bool is_fully_connected, bool is_flatten) +Status NEIm2Col::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, + unsigned int num_groups, bool is_fully_connected, bool is_flatten) { - return NEIm2ColKernel::validate(input, output, kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten); + return NEIm2ColKernel::validate(input, output, kernel_dims, conv_info, has_bias, dilation, num_groups, is_fully_connected, is_flatten); } void NEIm2Col::run() diff --git a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp index 0737bd2f73..80a2541176 100644 --- a/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NELocallyConnectedLayer.cpp @@ -113,7 +113,7 @@ Status NELocallyConnectedLayer::validate(const ITensorInfo *input, const ITensor TensorInfo input_im2col_reshaped_info(shape_im2col, 1, input->data_type()); TensorInfo gemm_output_info(shape_gemm, 1, input->data_type()); - ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &input_im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, has_bias, false)); + ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &input_im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, has_bias)); ARM_COMPUTE_RETURN_ON_ERROR(NEWeightsReshapeKernel::validate(weights, biases, &weights_reshaped_info)); ARM_COMPUTE_RETURN_ON_ERROR(NELocallyConnectedMatrixMultiplyKernel::validate(&input_im2col_reshaped_info, &weights_reshaped_info, &gemm_output_info)); ARM_COMPUTE_RETURN_ON_ERROR(NECol2ImKernel::validate(&gemm_output_info, output, Size2D(conv_w, conv_h))); -- cgit v1.2.1