From 745153b7ec2b6f3cd08d097b4d746503b0775402 Mon Sep 17 00:00:00 2001 From: Pablo Marquez Tello Date: Wed, 27 Sep 2023 15:20:40 +0100 Subject: NEDeconvolutionLayer validation fix * Added a new test to make sure we support the following configuration: NCHW InputInfo=Shape=2,2 WeightsInfo=Shape=3,3 OutputInfo=Shape=4,4, PadStrideInfo=1,1;0,0,0,0' * Fixed the validate() method to allow this configuration * Resolves MLCE-1120 Change-Id: I6874ad57bb81384185984741b983bf5e19ba150c Signed-off-by: Pablo Marquez Tello Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10417 Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- arm_compute/core/utils/misc/ShapeCalculator.h | 50 +++++++++-- .../NEON/functions/NEDeconvolutionLayer.cpp | 34 ++++---- tests/validation/NEON/DeconvolutionLayer.cpp | 97 ++++++++++++++++------ 3 files changed, 134 insertions(+), 47 deletions(-) diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index 31362f1ac4..86dcfdc3d0 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ACL_ARM_COMPUTE_CORE_UTILS_MISC_SHAPECALCULATOR -#define ACL_ARM_COMPUTE_CORE_UTILS_MISC_SHAPECALCULATOR +#ifndef ACL_ARM_COMPUTE_CORE_UTILS_MISC_SHAPECALCULATOR_H +#define ACL_ARM_COMPUTE_CORE_UTILS_MISC_SHAPECALCULATOR_H #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensorInfo.h" @@ -459,6 +459,37 @@ compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo return output_shape; } +/** Calculate padding required for deconvolution + * + * @param[in] input Input tensor info + * @param[in] weights Weights tensor shape + * @param[in] sx Stride on x axis + * @param[in] sy Stride on y axis + * @param[in] out_dims Output shape dimensions + * + * @return the padding required + */ +inline std::pair compute_deconvolution_padding(const ITensorInfo &input, + const ITensorInfo &weights, + int32_t sx, + int32_t sy, + std::pair out_dims) +{ + const DataLayout data_layout = input.data_layout(); + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + + // Find the upsampled dimensions + int32_t out_x = (static_cast(input.dimension(idx_w)) - 1) * sx + 1; + int32_t out_y = (static_cast(input.dimension(idx_h)) - 1) * sy + 1; + + // Find the padding needed for the convolution with stride 1 in order to match output shape + int32_t padx = out_dims.first - (out_x - static_cast(weights.dimension(idx_w)) + 1); + int32_t pady = out_dims.second - (out_y - static_cast(weights.dimension(idx_h)) + 1); + + return std::make_pair(padx, pady); +} + /** Calculate the upsampled output shape used for deconvolution * * @param[in] input Input tensor info @@ -479,17 +510,20 @@ inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo uint32_t &padx, uint32_t &pady) { + // Find the padding needed for the convolution with stride 1 in order to match output shape + const auto padxy = + compute_deconvolution_padding(input, weights, static_cast(sx), static_cast(sy), out_dims); + padx = static_cast(padxy.first); + pady = static_cast(padxy.second); + const DataLayout data_layout = input.data_layout(); const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); // Find the upsampled dimensions - unsigned int out_x = (input.dimension(idx_w) - 1) * sx + 1; - unsigned int out_y = (input.dimension(idx_h) - 1) * sy + 1; + uint32_t out_x = (input.dimension(idx_w) - 1) * sx + 1; + uint32_t out_y = (input.dimension(idx_h) - 1) * sy + 1; - // Find the padding needed for the convolution with stride 1 in order to match output shape - padx = out_dims.first - (out_x - weights.dimension(idx_w) + 1); - pady = out_dims.second - (out_y - weights.dimension(idx_h) + 1); out_x += padx; out_y += pady; @@ -1694,4 +1728,4 @@ compute_gather_shape(const TensorShape &input_shape, const TensorShape &indices_ } // namespace shape_calculator } // namespace misc } // namespace arm_compute -#endif /* ACL_ARM_COMPUTE_CORE_UTILS_MISC_SHAPECALCULATOR */ +#endif // ACL_ARM_COMPUTE_CORE_UTILS_MISC_SHAPECALCULATOR_H diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp index 3987370d9e..081c7cc538 100644 --- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp @@ -65,7 +65,6 @@ PadStrideInfo compute_upsample_info(const PadStrideInfo &info, uint32_t deconv_p return PadStrideInfo(stride_x, stride_y, deconv_pad_left, deconv_pad_right, deconv_pad_top, deconv_pad_bottom, DimensionRoundingType::FLOOR); } - } // namespace NEDeconvolutionLayer::NEDeconvolutionLayer(std::shared_ptr memory_manager) // NOLINT @@ -110,6 +109,16 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); } + const unsigned int pad_left = info.pad_left(); + const unsigned int pad_top = info.pad_top(); + const unsigned int pad_right = info.pad_right(); + const unsigned int pad_bottom = info.pad_bottom(); + + ARM_COMPUTE_RETURN_ERROR_ON(((input->dimension(width_idx) - 1) * info.stride().first + + weights->dimension(width_idx)) < (pad_left + pad_right)); + ARM_COMPUTE_RETURN_ERROR_ON(((input->dimension(height_idx) - 1) * info.stride().second + + weights->dimension(height_idx)) < (pad_top + pad_bottom)); + auto out_dims = deconvolution_output_dimensions(input->dimension(width_idx), input->dimension(height_idx), weights->dimension(width_idx), weights->dimension(height_idx), info); @@ -140,20 +149,14 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, "Output's depth is invalid."); } - uint32_t deconv_pad_x = 0; - uint32_t deconv_pad_y = 0; - const unsigned int stride_x = info.stride().first; - const unsigned int stride_y = info.stride().second; - // Guard against overflows in compute_deconvolution_upsampled_shape() - const DataLayout data_layout = input->data_layout(); - const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const unsigned int out_x = (input->dimension(idx_w) - 1) * stride_x + 1; - const unsigned int out_y = (input->dimension(idx_h) - 1) * stride_y + 1; - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) > out_x); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) > out_y); - ARM_COMPUTE_RETURN_ERROR_ON((out_x - weights->dimension(idx_w) + 1) > out_dims.first); - ARM_COMPUTE_RETURN_ERROR_ON((out_y - weights->dimension(idx_h) + 1) > out_dims.second); + uint32_t deconv_pad_x = 0; + uint32_t deconv_pad_y = 0; + const uint32_t stride_x = info.stride().first; + const uint32_t stride_y = info.stride().second; + const auto deconv_padding = compute_deconvolution_padding(*input, *weights, static_cast(stride_x), + static_cast(stride_y), out_dims); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(deconv_padding.first < 0 || deconv_padding.second < 0, + "Negative padding not supported"); const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input, *weights, stride_x, stride_y, out_dims, deconv_pad_x, deconv_pad_y); @@ -235,6 +238,7 @@ void NEDeconvolutionLayer::configure(ITensor *input, uint32_t deconv_pad_y = 0; const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape( *input->info(), *weights->info(), stride_x, stride_y, out_dims, deconv_pad_x, deconv_pad_y); + const PadStrideInfo upsample_info = compute_upsample_info(info, deconv_pad_x, deconv_pad_y); // Do not perform upsampling when the operation uses unit stride in all dimensions diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp index d26d26adf7..b4c049f6f9 100644 --- a/tests/validation/NEON/DeconvolutionLayer.cpp +++ b/tests/validation/NEON/DeconvolutionLayer.cpp @@ -52,54 +52,81 @@ constexpr float tolerance_num_fp16 = 0.02f; constexpr float tolerance_num_quant = 0.07f; /**< Tolerance number for quantized types */ const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 3) - * framework::dataset::make("PadY", 0, 3) * framework::dataset::make("NumKernels", { 3 }); + * framework::dataset::make("PadY", 0, 3) * framework::dataset::make("NumKernels", +{ + 3 +}); const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 2) - * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", { 3 }); + * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", +{ + 3 +}); const auto data3x3_asymm = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadLeft", 0, 1) - * framework::dataset::make("PadRight", 0, 1) * framework::dataset::make("PadTop", 0, 1) * framework::dataset::make("PadBottom", 0, 1) * framework::dataset::make("NumKernels", { 3 }); + * framework::dataset::make("PadRight", 0, 1) * framework::dataset::make("PadTop", 0, 1) * framework::dataset::make("PadBottom", 0, 1) * framework::dataset::make("NumKernels", +{ + 3 +}); -const auto data9x9_small_asymm = framework::dataset::make("InputShape", TensorShape{ 10U, 10U, 1U, 1U }) *framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY", - 2) - *framework::dataset::make("PadLeft", 3) - *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 }); +const auto data9x9_small_asymm = framework::dataset::make("InputShape", TensorShape +{ + 10U, 10U, 1U, 1U +}) +*framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY", 2) *framework::dataset::make("PadLeft", 3) *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", + 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 }); -const auto data9x9_large_asymm = framework::dataset::make("InputShape", TensorShape{ 640U, 360U, 56U, 1U }) *framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY", - 2) - *framework::dataset::make("PadLeft", 3) - *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 }); +const auto data9x9_large_asymm = framework::dataset::make("InputShape", TensorShape +{ + 640U, 360U, 56U, 1U +}) +*framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY", 2) *framework::dataset::make("PadLeft", 3) *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", + 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 }); const auto data3x3_precommit = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadX", 0, 2) - * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", { 3 }); + * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", +{ + 3 +}); const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 1) - * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("NumKernels", { 3 }); + * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("NumKernels", +{ + 3 +}); const auto data5x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 1) - * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("NumKernels", { 3 }); + * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("NumKernels", +{ + 3 +}); -const auto data_layouts_dataset = framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }); +const auto data_layouts_dataset = framework::dataset::make("DataLayout", +{ + DataLayout::NCHW, DataLayout::NHWC +}); -const auto add_bias_dataset = framework::dataset::make("AddBias", { true, false }); +const auto add_bias_dataset = framework::dataset::make("AddBias", +{ + true, false +}); const auto input_qinfo_dataset = framework::dataset::make("InputQInfo", { QuantizationInfo(1.f / 255.f, 0), - QuantizationInfo(2.f, 0), + QuantizationInfo(2.f, 0), }); const auto output_qinfo_dataset = framework::dataset::make("OutputQInfo", { QuantizationInfo(3.f / 255.f, 0), - QuantizationInfo(4.f, 0), + QuantizationInfo(4.f, 0), }); } // namespace TEST_SUITE(NEON) TEST_SUITE(DeconvolutionLayer) - // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( @@ -109,6 +136,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid bias shape TensorInfo(TensorShape(13U, 11U, 4U, 3U), 1, DataType::F32), // Window shrink TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(2U,2U,1U,1U), 1, DataType::F32), // Small shape no padding + TensorInfo(TensorShape(3U,26U,26U,1U), 1, DataType::F32), // Negative padding }), framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16), TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32), @@ -116,6 +145,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( TensorInfo(TensorShape(3U, 2U, 2U, 2U), 1, DataType::F32), TensorInfo(TensorShape(3U, 3U, 4U), 1, DataType::F32), TensorInfo(TensorShape(1U, 1U, 2U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(3U,3U,1U,1U), 1, DataType::F32), + TensorInfo(TensorShape(1U,1U,26U,88U), 1, DataType::F32), })), framework::dataset::make("BiasInfo", { TensorInfo(TensorShape(1U), 1, DataType::F16), TensorInfo(TensorShape(1U), 1, DataType::F32), @@ -123,6 +154,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( TensorInfo(TensorShape(25U, 11U), 1, DataType::F32), TensorInfo(TensorShape(1U), 1, DataType::F32), TensorInfo(TensorShape(4U), 1, DataType::F32), + TensorInfo(TensorShape(1U), 1, DataType::F32), + TensorInfo(TensorShape(88U), 1, DataType::F32), })), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16), TensorInfo(TensorShape(25U, 10U, 2U), 1, DataType::F32), @@ -130,6 +163,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( TensorInfo(TensorShape(13U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(11U, 9U, 1U, 3U), 1, DataType::F32), TensorInfo(TensorShape(32U, 16U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(4U,4U,1U,1U), 1, DataType::F32), + TensorInfo(TensorShape(1U,78U,88U,1U), 1, DataType::F32), })), framework::dataset::make("PadStrideInfo", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(1, 1, 0, 0), @@ -137,8 +172,10 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( PadStrideInfo(1, 1, 0, 0), PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(2, 3, 3, 1), })), - framework::dataset::make("Expected", { false, false, false, false, false, true })), + framework::dataset::make("Expected", { false, false, false, false, false, true,true, false })), input_info, weights_info, bias_info, output_info, pad_info, expected) { bool is_valid = bool(NEDeconvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pad_info)); @@ -452,10 +489,22 @@ TEST_SUITE_END() // W5x1 TEST_SUITE_END() // QASYMM8_SIGNED -const auto input_qinfo_per_channel_dataset = framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10) }); -const auto output_qinfo_per_channel_dataset = framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 0) }); -const auto input_signed_qinfo_per_channel_dataset = framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, -10) }); -const auto output_signed_qinfo_per_channel_dataset = framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 10) }); +const auto input_qinfo_per_channel_dataset = framework::dataset::make("InputQuantizationInfo", +{ + QuantizationInfo(1.f / 255.f, 10) +}); +const auto output_qinfo_per_channel_dataset = framework::dataset::make("OutputQuantizationInfo", +{ + QuantizationInfo(3.f / 255.f, 0) +}); +const auto input_signed_qinfo_per_channel_dataset = framework::dataset::make("InputQuantizationInfo", +{ + QuantizationInfo(1.f / 255.f, -10) +}); +const auto output_signed_qinfo_per_channel_dataset = framework::dataset::make("OutputQuantizationInfo", +{ + QuantizationInfo(3.f / 255.f, 10) +}); TEST_SUITE(QSYMM8_PER_CHANNEL) -- cgit v1.2.1