From 7da29b6b12ff319ed2b6e2c46588dfa1991556fb Mon Sep 17 00:00:00 2001 From: Alex Gilday Date: Fri, 23 Mar 2018 14:16:00 +0000 Subject: COMPMID-1017: Implement dilated convolution in NEON, OpenCL, and GC Change-Id: If4626ec9e215e14dffe22e80812da5bac84a52e2 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/125734 Reviewed-by: Anthony Barbier Tested-by: Jenkins --- src/runtime/CL/functions/CLConvolutionLayer.cpp | 18 ++++++++++-------- src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp | 16 +++++++++------- .../GLES_COMPUTE/functions/GCConvolutionLayer.cpp | 7 ++++--- src/runtime/NEON/functions/NEConvolutionLayer.cpp | 19 ++++++++++--------- src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp | 17 +++++++++-------- 5 files changed, 42 insertions(+), 35 deletions(-) (limited to 'src/runtime') diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp index 1a486ce5c7..64bda93ff0 100644 --- a/src/runtime/CL/functions/CLConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp @@ -42,13 +42,14 @@ CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr memory_ma { } -void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) +void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, + const Size2D &dilation) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info)); + ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation)); switch(CLConvolutionLayer::get_convolution_method(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, - weights_info, CLScheduler::get().target())) + weights_info, CLScheduler::get().target(), dilation)) { case ConvolutionMethod::DIRECT: { @@ -60,7 +61,7 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c case ConvolutionMethod::GEMM: { auto f = arm_compute::support::cpp14::make_unique(_memory_manager); - f->configure(input, weights, biases, output, conv_info, weights_info); + f->configure(input, weights, biases, output, conv_info, weights_info, dilation); _function = std::move(f); break; } @@ -71,14 +72,14 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c } Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info) + const WeightsInfo &weights_info, const Size2D &dilation) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); //Configure if the parameters match the direct convolution or the gemm-based const GPUTarget gpu_target = CLScheduler::get().target(); - switch(CLConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, gpu_target)) + switch(CLConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, gpu_target, dilation)) { case ConvolutionMethod::DIRECT: { @@ -89,7 +90,7 @@ Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo case ConvolutionMethod::GEMM: { // Validate gemm-based convolution layer - CLGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info); + CLGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation); break; } default: @@ -101,7 +102,7 @@ Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo } ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info, const GPUTarget gpu_target) + const WeightsInfo &weights_info, const GPUTarget gpu_target, const Size2D &dilation) { ARM_COMPUTE_UNUSED(input); ARM_COMPUTE_UNUSED(weights); @@ -110,6 +111,7 @@ ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo * ARM_COMPUTE_UNUSED(conv_info); ARM_COMPUTE_UNUSED(weights_info); ARM_COMPUTE_UNUSED(gpu_target); + ARM_COMPUTE_UNUSED(dilation); return ConvolutionMethod::GEMM; } diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp index bc339f176f..e7ad62f5ff 100644 --- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp @@ -151,7 +151,8 @@ Status CLGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens return Status{}; } -void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) +void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, + const Size2D &dilation) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); @@ -160,7 +161,8 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * biases != nullptr ? biases->info() : nullptr, output->info(), conv_info, - weights_info)); + weights_info, + dilation)); _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); @@ -187,7 +189,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * const unsigned int kernel_width = weights->info()->dimension(0); const unsigned int kernel_height = weights->info()->dimension(1); std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_width, kernel_height, - conv_info); + conv_info, dilation); unsigned int mat_weights_cols = weights->info()->dimension(3); unsigned int mat_weights_rows = weights->info()->dimension(0) * weights->info()->dimension(1) * weights->info()->dimension(2) + bias_element; @@ -224,7 +226,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * _memory_group.manage(&_gemm_output); // Configure im2col - _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, append_bias); + _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation); // Configure GEMM configure_mm(&_im2col_output, weights, &_gemm_output); @@ -260,7 +262,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * } Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info) + const WeightsInfo &weights_info, const Size2D &dilation) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!"); @@ -282,7 +284,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI const unsigned int kernel_width = weights->dimension(0); const unsigned int kernel_height = weights->dimension(1); - std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_width, kernel_height, conv_info); + std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_width, kernel_height, conv_info, dilation); unsigned int mat_weights_cols = weights->dimension(3); unsigned int mat_weights_rows = weights->dimension(0) * weights->dimension(1) * weights->dimension(2) + bias_element; @@ -298,7 +300,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI shape_im2col.set(2, 1); TensorInfo im2col_reshaped_info(shape_im2col, 1, dt, input->fixed_point_position()); im2col_reshaped_info.set_quantization_info(input->quantization_info()); - CLIm2ColKernel::validate(input, &im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, append_bias); + CLIm2ColKernel::validate(input, &im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation); // Create GEMM output tensor TensorShape shape_gemm = im2col_reshaped_info.tensor_shape(); diff --git a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp index f4c073668a..c2b7e02284 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp @@ -102,7 +102,8 @@ void GCConvolutionLayer::configure_mm(const IGCTensor *input, const IGCTensor *w _mm_kernel.configure(input, weights, output, 1.f, is_interleaved_transposed); } -void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) +void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, + const Size2D &dilation) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); @@ -136,7 +137,7 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig const unsigned int kernel_width = (_are_weights_reshaped) ? weights_info.kernel_size().first : weights->info()->dimension(0); const unsigned int kernel_height = (_are_weights_reshaped) ? weights_info.kernel_size().second : weights->info()->dimension(1); std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_width, kernel_height, - conv_info); + conv_info, dilation); // Check if its a "fully connected" convolution _is_fully_connected_convolution = ((conv_w == 1) && (conv_h == 1)); @@ -229,7 +230,7 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig input->info()->extend_padding(border_size); _fill_border.configure(input, border_size, BorderMode::CONSTANT, PixelValue(0)); // for PAD of im2col fp16: consider it as border } - _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _append_bias); + _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _append_bias, dilation); // Configure matrix multiply if(run_interleaved) diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp index d4421e8429..e659495b7c 100644 --- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp @@ -40,14 +40,15 @@ NEConvolutionLayer::NEConvolutionLayer(std::shared_ptr memory_ma { } -void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) +void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, + const Size2D &dilation) { // Perform validate step ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_ERROR_THROW_ON(NEConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info)); + ARM_COMPUTE_ERROR_THROW_ON(NEConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation)); switch(NEConvolutionLayer::get_convolution_method(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, - weights_info)) + weights_info, dilation)) { case ConvolutionMethod::WINOGRAD: { @@ -59,7 +60,7 @@ void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const case ConvolutionMethod::GEMM: { auto f = arm_compute::support::cpp14::make_unique(_memory_manager); - f->configure(input, weights, biases, output, conv_info, weights_info); + f->configure(input, weights, biases, output, conv_info, weights_info, dilation); _function = std::move(f); break; } @@ -77,9 +78,9 @@ void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const } Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info) + const WeightsInfo &weights_info, const Size2D &dilation) { - switch(NEConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info)) + switch(NEConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, dilation)) { case ConvolutionMethod::WINOGRAD: //Validate Winograd @@ -87,7 +88,7 @@ Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo break; case ConvolutionMethod::GEMM: //Validate Gemm-based Convolution - NEGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info); + NEGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation); break; case ConvolutionMethod::DIRECT: //Validate Gemm-based Convolution @@ -101,12 +102,12 @@ Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo } ConvolutionMethod NEConvolutionLayer::get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info) + const WeightsInfo &weights_info, const Size2D &dilation) { ARM_COMPUTE_UNUSED(output); ARM_COMPUTE_UNUSED(weights_info); if((input->data_type() == DataType::F32) && (weights->dimension(0) == 3) && (weights->dimension(1) == 3) && (weights->num_dimensions() <= 4) && (conv_info.stride().first == 1) - && (conv_info.stride().second == 1) && (biases != nullptr)) + && (conv_info.stride().second == 1) && (biases != nullptr) && (dilation == Size2D(1U, 1U))) { return ConvolutionMethod::WINOGRAD; } diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp index 3b8b4243e5..d9707d95e0 100644 --- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp @@ -170,7 +170,7 @@ Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInf bool &are_weights_reshaped, unsigned int &kernel_width, unsigned int &kernel_height, bool &is_fully_connected_convolution, bool &is_interleaved, bool &is_quantized, unsigned int &mat_weights_cols, unsigned int &mat_weights_rows, - unsigned int &conv_w, unsigned int &conv_h) + unsigned int &conv_w, unsigned int &conv_h, const Size2D &dilation) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); @@ -205,7 +205,7 @@ Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInf mat_weights_rows = weights->dimension(0) * weights->dimension(1) * weights->dimension(2) + (append_bias ? 1 : 0); std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_width, kernel_height, - conv_info); + conv_info, dilation); // Check if its a "fully connected" convolution is_fully_connected_convolution = ((conv_w == 1) && (conv_h == 1)); @@ -246,7 +246,8 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w } } -void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) +void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, + const Size2D &dilation) { // Perform validate step ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); @@ -262,7 +263,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig Status status = validate_and_initialize_values(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(), conv_info, weights_info, dt, _append_bias, _are_weights_reshaped, kernel_width, kernel_height, _is_fully_connected_convolution, _is_interleaved, _is_quantized, - mat_weights_cols, mat_weights_rows, conv_w, conv_h); + mat_weights_cols, mat_weights_rows, conv_w, conv_h, dilation); ARM_COMPUTE_ERROR_THROW_ON(status); @@ -362,7 +363,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig // Configure kernels // Configure im2col - _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _append_bias); + _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _append_bias, false, false, dilation); // Configure matrix multiply if(run_optimised) @@ -420,7 +421,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig } Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info) + const WeightsInfo &weights_info, const Size2D &dilation) { ARM_COMPUTE_UNUSED(output); @@ -439,7 +440,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI Status status = validate_and_initialize_values(input, weights, biases, conv_info, weights_info, dt, append_bias, are_weights_reshaped, kernel_width, kernel_height, is_fully_connected_convolution, is_interleaved, is_quantized, mat_weights_cols, mat_weights_rows, - conv_w, conv_h); + conv_w, conv_h, dilation); const Size2D kernel_weights = Size2D(kernel_width, kernel_height); @@ -517,7 +518,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI shape_im2col.set(1, mat_input_rows); shape_im2col.set(2, 1); TensorInfo im2_col_info = input->clone()->set_tensor_shape(shape_im2col); - ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &im2_col_info, kernel_weights, conv_info, append_bias, false)); + ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &im2_col_info, kernel_weights, conv_info, append_bias, false, false, dilation)); // Create GEMM output tensor TensorShape shape_gemm(im2_col_info.tensor_shape()); -- cgit v1.2.1