From adfcacc8e39888a9a62e33c178041642d0a3047a Mon Sep 17 00:00:00 2001 From: Annop Wongwathanarat Date: Wed, 1 Mar 2023 15:19:50 +0000 Subject: Add weights_info as optional input for NEDeconvolutionLayer This is so that we can leverage fixed format kernel when using gemm convolution method. Partially resolves: [ONCPUML-1129] Change-Id: I61ffa74f5cd9d75579dbc1f9aa187371f855e932 Signed-off-by: Annop Wongwathanarat Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9248 Reviewed-by: Jakub Sujak Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- .../runtime/NEON/functions/NEDeconvolutionLayer.h | 19 +++++++----- .../NEON/functions/NEDeconvolutionLayer.cpp | 35 +++++++++++----------- 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h index 869df69f11..cdc3a636b0 100644 --- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h @@ -110,12 +110,14 @@ public: * @param[in] bias Optional, ignored if NULL. The biases have one dimension. * Data type supported: Data types supported: S32 for QASYMM8/QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input. * @param[out] output Output tensor. The output has the same number of dimensions as the @p input. - * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo. + * @param[in] info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation - * available which may introduce a drop of accuracy as well. Default is false + * available which may introduce a drop of accuracy as well. Default is false + * @param[in] weights_info (Optional) Specifies the weight format. Default is unspecified. This parameter can be used to specify the weight format that is optimal for + * the GEMM convolution. * */ - void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info, bool enable_fast_math = false); + void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info, bool enable_fast_math = false, const WeightsInfo &weights_info = WeightsInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEDeconvolutionLayer * * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. @@ -124,13 +126,16 @@ public: * Data type supported: Same as @p input, also could be QSYMM8_PER_CHANNEL if input is QASYMM8/QASYMM8_SIGNED. * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8/QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input. * @param[in] output Output tensor info. The output has the same number of dimensions as the @p input. - * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo. + * @param[in] info Contains padding and policies to be used in the deconvolution, this is described in @ref PadStrideInfo. * @param[in] enable_fast_math (Optional) Enable fast math computation. In case this flag were set, the function could dispatch the fastest implementation - * available which may introduce a drop of accuracy as well. Default is false + * available which may introduce a drop of accuracy as well. Default is false + * @param[in] weights_info (Optional) Specifies the weight format. Default is unspecified. This parameter can be used to specify the weight format that is optimal for + * the GEMM convolution. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &info, bool enable_fast_math = false); + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &info, + bool enable_fast_math = false, const WeightsInfo &weights_info = WeightsInfo()); // Inherited methods overridden: void run() override; @@ -150,5 +155,5 @@ private: bool _is_prepared; bool _do_upsampling; }; -} // arm_compute +} // namespace arm_compute #endif /* ARM_COMPUTE_NEDECONVOLUTIONLAYER_H */ diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp index 96ffedd5b4..8534d2a8f3 100644 --- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp @@ -82,7 +82,8 @@ NEDeconvolutionLayer::NEDeconvolutionLayer(std::shared_ptr memor { } -Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &info, bool enable_fast_math) +Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &info, + bool enable_fast_math, const WeightsInfo &weights_info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, DataType::QASYMM8, DataType::QASYMM8_SIGNED); @@ -152,26 +153,26 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(batches_idx) != scale_out_info.dimension(batches_idx)); ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(channel_idx) != scale_out_info.dimension(channel_idx)); - if (do_upsampling) + if(do_upsampling) { const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); - ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayer::validate(&scale_out_info, weights, bias, output, conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), enable_fast_math)); + ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayer::validate(&scale_out_info, weights, bias, output, conv_info, weights_info, Size2D(1U, 1U), ActivationLayerInfo(), enable_fast_math)); } else { const PadStrideInfo conv_info(1, 1, upsample_info.pad_left(), upsample_info.pad_right(), upsample_info.pad_top(), upsample_info.pad_bottom(), DimensionRoundingType::CEIL); - ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayer::validate(input, weights, bias, output, conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), enable_fast_math)); + ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayer::validate(input, weights, bias, output, conv_info, weights_info, Size2D(1U, 1U), ActivationLayerInfo(), enable_fast_math)); } return Status{}; } -void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info, bool enable_fast_math) +void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info, bool enable_fast_math, const WeightsInfo &weights_info) { // Perform validation step ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_ERROR_THROW_ON(NEDeconvolutionLayer::validate(input->info(), weights->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(), info, enable_fast_math)); - ARM_COMPUTE_LOG_PARAMS(input, weights, bias, output, info, enable_fast_math); + ARM_COMPUTE_ERROR_THROW_ON(NEDeconvolutionLayer::validate(input->info(), weights->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(), info, enable_fast_math, weights_info)); + ARM_COMPUTE_LOG_PARAMS(input, weights, bias, output, info, enable_fast_math, weights_info); const DataLayout data_layout = input->info()->data_layout(); const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); @@ -198,12 +199,12 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con _flip_weights.configure(weights, &_weights_flipped, &_flip_axis); // setup the function to convolve the upscaled output - uint32_t deconv_pad_x = 0; - uint32_t deconv_pad_y = 0; - const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(), - stride_x, stride_y, - out_dims, deconv_pad_x, deconv_pad_y); - const PadStrideInfo upsample_info = compute_upsample_info(info, deconv_pad_x, deconv_pad_y); + uint32_t deconv_pad_x = 0; + uint32_t deconv_pad_y = 0; + const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(), + stride_x, stride_y, + out_dims, deconv_pad_x, deconv_pad_y); + const PadStrideInfo upsample_info = compute_upsample_info(info, deconv_pad_x, deconv_pad_y); // Do not perform upsampling when the operation uses unit stride in all dimensions _do_upsampling = stride_x != 1 || stride_y != 1; @@ -215,12 +216,12 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con axis_data[1] = static_cast(height_idx); // Setup convolution and upsampling, if needed - if (_do_upsampling) + if(_do_upsampling) { _memory_group.manage(&_scaled_output); const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); - TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info()); + TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info()); scale_out_info.set_data_layout(data_layout); _scaled_output.allocator()->init(scale_out_info); @@ -228,14 +229,14 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con // The padding amount can be given as input to the convolution layer. _upsample_f.configure(input, &_scaled_output, upsample_info); - _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), enable_fast_math); + _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info, weights_info, Size2D(1U, 1U), ActivationLayerInfo(), enable_fast_math); _scaled_output.allocator()->allocate(); } else { const PadStrideInfo conv_info(1, 1, upsample_info.pad_left(), upsample_info.pad_right(), upsample_info.pad_top(), upsample_info.pad_bottom(), DimensionRoundingType::CEIL); - _conv_f.configure(input, &_weights_flipped, bias, output, conv_info, WeightsInfo(), Size2D(1U, 1U), ActivationLayerInfo(), enable_fast_math); + _conv_f.configure(input, &_weights_flipped, bias, output, conv_info, weights_info, Size2D(1U, 1U), ActivationLayerInfo(), enable_fast_math); } } -- cgit v1.2.1