From 019a7d9fb5f988c47adb1327492db62869cfa6ac Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Tue, 27 Jun 2023 16:33:57 +0100 Subject: Enable transpose convolution with non-square kernels Resolves: COMPMID-6319 Signed-off-by: Viet-Hoa Do Change-Id: I49a17ff973efc88b7ce0334c47ecf076c03f4cc3 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9829 Reviewed-by: Jakub Sujak Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- docs/user_guide/release_version_and_change_log.dox | 1 + .../CL/functions/CLDirectDeconvolutionLayer.cpp | 4 +- .../NEON/functions/NEDeconvolutionLayer.cpp | 2 +- tests/validation/CL/DeconvolutionLayer.cpp | 86 +++++++++++++++++++++- tests/validation/NEON/DeconvolutionLayer.cpp | 81 ++++++++++++++++++++ .../fixtures/DeconvolutionLayerFixture.h | 6 +- 6 files changed, 171 insertions(+), 9 deletions(-) diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox index e3235458d1..ec96f6096a 100644 --- a/docs/user_guide/release_version_and_change_log.dox +++ b/docs/user_guide/release_version_and_change_log.dox @@ -47,6 +47,7 @@ v23.08 Public major release - New features - Add new OpenCLâ„¢ kernels: - @ref opencl::kernels::ClMatMulNativeMMULKernel support for FP32 and FP16, with batch support + - Enable transposed convolution with non-square kernels on CPU and GPU. v23.05.1 Public patch release - Enable CMake and Bazel option to build multi_isa without FP16 support. diff --git a/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp index b263d73e9e..88c3c6193c 100644 --- a/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -67,8 +67,8 @@ Status CLDirectDeconvolutionLayer::validate(const ITensorInfo *input, const ITen const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) != weights->dimension(idx_h)); ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) < 1); + ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_h) < 1); auto out_dims = deconvolution_output_dimensions(input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w), weights->dimension(idx_h), info); diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp index 8534d2a8f3..439aff0840 100644 --- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp @@ -89,8 +89,8 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, DataType::QASYMM8, DataType::QASYMM8_SIGNED); const unsigned int width_idx = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH); const unsigned int height_idx = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) != weights->dimension(height_idx)); ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) < 1); + ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(height_idx) < 1); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(weights, input); if(is_data_type_quantized_per_channel(weights->data_type()) && is_data_type_quantized(input->data_type())) { diff --git a/tests/validation/CL/DeconvolutionLayer.cpp b/tests/validation/CL/DeconvolutionLayer.cpp index a04f273b60..6b12fc01a1 100644 --- a/tests/validation/CL/DeconvolutionLayer.cpp +++ b/tests/validation/CL/DeconvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -81,6 +81,9 @@ const auto data2x2_precommit = datasets::SmallDeconvolutionShapes() * framework: const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 1) * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("NumKernels", { 3 }); +const auto data5x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 1) + * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("NumKernels", { 3 }); + const auto data_layouts_dataset = framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }); const auto add_bias_dataset = framework::dataset::make("AddBias", { true, false }); @@ -185,6 +188,9 @@ using CLDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture using CLDeconvolutionLayerAsymmFixture9x9 = DeconvolutionValidationAsymmFixture; +template +using CLDeconvolutionLayerFixture5x1 = DeconvolutionValidationFixture; + TEST_SUITE(Float) TEST_SUITE(FP32) @@ -265,6 +271,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerAsymmFixture9x9, fra validate(CLAccessor(_target), _reference, tolerance_fp32); } TEST_SUITE_END() // W9x9 + +TEST_SUITE(W5x1) +FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture5x1, framework::DatasetMode::NIGHTLY, combine(combine(combine(data5x1, framework::dataset::make("DataType", DataType::F32)), + data_layouts_dataset), + add_bias_dataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_fp32); +} +TEST_SUITE_END() // W5x1 + TEST_SUITE_END() // FP32 TEST_SUITE(FP16) @@ -318,6 +335,16 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture1x1, framework::Dat } TEST_SUITE_END() // W1x1 +TEST_SUITE(W5x1) +FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture5x1, framework::DatasetMode::NIGHTLY, combine(combine(combine(data5x1, framework::dataset::make("DataType", DataType::F16)), + data_layouts_dataset), + add_bias_dataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} +TEST_SUITE_END() // W5x1 + TEST_SUITE_END() // FP16 TEST_SUITE_END() // Float @@ -333,6 +360,9 @@ using CLDeconvolutionLayerQuantizedFixture2x2 = DeconvolutionValidationQuantized template using CLDeconvolutionLayerQuantizedFixture1x1 = DeconvolutionValidationQuantizedFixture; +template +using CLDeconvolutionLayerQuantizedFixture5x1 = DeconvolutionValidationQuantizedFixture; + template using CLDeconvolutionLayerQuantizedPerChannelFixture4x4 = DeconvolutionValidationQuantizedPerChannelFixture; @@ -345,6 +375,9 @@ using CLDeconvolutionLayerQuantizedPerChannelFixture2x2 = DeconvolutionValidatio template using CLDeconvolutionLayerQuantizedPerChannelFixture1x1 = DeconvolutionValidationQuantizedPerChannelFixture; +template +using CLDeconvolutionLayerQuantizedPerChannelFixture5x1 = DeconvolutionValidationQuantizedPerChannelFixture; + TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) @@ -412,6 +445,19 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture1x1, fr } TEST_SUITE_END() // W1x1 +TEST_SUITE(W5x1) +FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture5x1, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data5x1, framework::dataset::make("DataType", + DataType::QASYMM8)), + data_layouts_dataset), + framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10), QuantizationInfo(2.f / 255.f, 5) })), + framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 5), QuantizationInfo(4.f / 255.f, 10) })), + add_bias_dataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); +} +TEST_SUITE_END() // W5x1 + TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) @@ -485,6 +531,19 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture1x1, fra } TEST_SUITE_END() // W1x1 +TEST_SUITE(W5x1) +FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture5x1, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data5x1, framework::dataset::make("DataType", + DataType::QASYMM8_SIGNED)), + data_layouts_dataset), + framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10), QuantizationInfo(2.f / 255.f, 5) })), + framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 5), QuantizationInfo(4.f / 255.f, 10) })), + add_bias_dataset)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); +} +TEST_SUITE_END() // W5x1 + TEST_SUITE_END() // QASYMM8_SIGNED const auto input_qinfo_dataset = framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10) }); @@ -619,6 +678,31 @@ FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFi } TEST_SUITE_END() // W1x1 +TEST_SUITE(W5x1) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture5x1, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data5x1, + framework::dataset::make("DataType", DataType::QASYMM8)), + data_layouts_dataset), + input_qinfo_dataset), + output_qinfo_dataset), + add_bias_dataset), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); +} +FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFixture5x1, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data5x1, + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + data_layouts_dataset), + input_signed_qinfo_dataset), + output_signed_qinfo_dataset), + add_bias_dataset), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num); +} +TEST_SUITE_END() // W5x1 + TEST_SUITE_END() // QSYMM8_PER_CHANNEL TEST_SUITE_END() // Quantized diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp index af25543193..d26d26adf7 100644 --- a/tests/validation/NEON/DeconvolutionLayer.cpp +++ b/tests/validation/NEON/DeconvolutionLayer.cpp @@ -76,6 +76,9 @@ const auto data3x3_precommit = datasets::SmallDeconvolutionShapes() * framework: const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 1) * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("NumKernels", { 3 }); +const auto data5x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 1) + * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("NumKernels", { 3 }); + const auto data_layouts_dataset = framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }); const auto add_bias_dataset = framework::dataset::make("AddBias", { true, false }); @@ -159,6 +162,9 @@ using NEDeconvolutionLayerAsymmFixture9x9 = DeconvolutionValidationAsymmFixture< template using NEDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture; +template +using NEDeconvolutionLayerFixture5x1 = DeconvolutionValidationFixture; + TEST_SUITE(Float) TEST_SUITE(FP32) TEST_SUITE(W4x4) @@ -222,6 +228,15 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerAsymmFixture9x9, fra validate(Accessor(_target), _reference, tolerance_fp32); } TEST_SUITE_END() // W9x9 +TEST_SUITE(W5x1) +FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture5x1, framework::DatasetMode::NIGHTLY, combine(combine(combine(data5x1, framework::dataset::make("DataType", DataType::F32)), + data_layouts_dataset), + add_bias_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp32); +} +TEST_SUITE_END() // W5x1 TEST_SUITE_END() // FP32 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC @@ -261,6 +276,15 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture1x1, framework::Dat validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16); } TEST_SUITE_END() // W1x1 +TEST_SUITE(W5x1) +FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture5x1, framework::DatasetMode::NIGHTLY, combine(combine(combine(data5x1, framework::dataset::make("DataType", DataType::F16)), + data_layouts_dataset), + add_bias_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16, tolerance_num_fp16); +} +TEST_SUITE_END() // W5x1 TEST_SUITE_END() // FP16 #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ @@ -275,6 +299,9 @@ using NEDeconvolutionLayerQuantizedFixture3x3 = DeconvolutionValidationQuantized template using NEDeconvolutionLayerQuantizedFixture1x1 = DeconvolutionValidationQuantizedFixture; +template +using NEDeconvolutionLayerQuantizedFixture5x1 = DeconvolutionValidationQuantizedFixture; + template using NEDeconvolutionLayerQuantizedPerChannelFixture4x4 = DeconvolutionValidationQuantizedPerChannelFixture; @@ -284,6 +311,9 @@ using NEDeconvolutionLayerQuantizedPerChannelFixture3x3 = DeconvolutionValidatio template using NEDeconvolutionLayerQuantizedPerChannelFixture1x1 = DeconvolutionValidationQuantizedPerChannelFixture; +template +using NEDeconvolutionLayerQuantizedPerChannelFixture5x1 = DeconvolutionValidationQuantizedPerChannelFixture; + TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) @@ -338,6 +368,19 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture1x1, fr } TEST_SUITE_END() // W1x1 +TEST_SUITE(W5x1) +FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture5x1, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data5x1, framework::dataset::make("DataType", + DataType::QASYMM8)), + data_layouts_dataset), + input_qinfo_dataset), + output_qinfo_dataset), + add_bias_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant); +} +TEST_SUITE_END() // W5x1 + TEST_SUITE_END() // QASYMM8 TEST_SUITE(QASYMM8_SIGNED) @@ -394,6 +437,19 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture1x1, fra } TEST_SUITE_END() // W1x1 +TEST_SUITE(W5x1) +FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture5x1, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data5x1, framework::dataset::make("DataType", + DataType::QASYMM8_SIGNED)), + data_layouts_dataset), + input_qinfo_dataset), + output_qinfo_dataset), + add_bias_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant); +} +TEST_SUITE_END() // W5x1 + TEST_SUITE_END() // QASYMM8_SIGNED const auto input_qinfo_per_channel_dataset = framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10) }); @@ -478,6 +534,31 @@ FIXTURE_DATA_TEST_CASE(RunSigned, NEDeconvolutionLayerQuantizedPerChannelFixture } TEST_SUITE_END() // W1x1 +TEST_SUITE(W5x1) +FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedPerChannelFixture5x1, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data5x1, + framework::dataset::make("DataType", DataType::QASYMM8)), + data_layouts_dataset), + input_qinfo_per_channel_dataset), + output_qinfo_per_channel_dataset), + add_bias_dataset), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant); +} +FIXTURE_DATA_TEST_CASE(RunSigned, NEDeconvolutionLayerQuantizedPerChannelFixture5x1, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(data5x1, + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + data_layouts_dataset), + input_signed_qinfo_per_channel_dataset), + output_signed_qinfo_per_channel_dataset), + add_bias_dataset), + framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num_quant); +} +TEST_SUITE_END() // W5x1 + TEST_SUITE_END() // QSYMM8_PER_CHANNEL TEST_SUITE_END() // Quantized diff --git a/tests/validation/fixtures/DeconvolutionLayerFixture.h b/tests/validation/fixtures/DeconvolutionLayerFixture.h index d13eab2f54..947dc10da2 100644 --- a/tests/validation/fixtures/DeconvolutionLayerFixture.h +++ b/tests/validation/fixtures/DeconvolutionLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -250,7 +250,6 @@ public: void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int padx, unsigned int pady, unsigned int num_kernels, DataType data_type, DataLayout data_layout, bool add_bias) { - ARM_COMPUTE_ERROR_ON_MSG(kernel_size_x != kernel_size_y, "Only square kernels supported"); const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels); const TensorShape bias_shape(num_kernels); const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL); @@ -271,7 +270,6 @@ public: void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int pad_left, unsigned int pad_right, unsigned int pad_top, unsigned int pad_bottom, unsigned int num_kernels, DataType data_type, DataLayout data_layout, bool add_bias) { - ARM_COMPUTE_ERROR_ON_MSG(kernel_size_x != kernel_size_y, "Only square kernels supported"); const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels); const TensorShape bias_shape(num_kernels); const PadStrideInfo info(sx, sy, pad_left, pad_right, pad_top, pad_bottom, DimensionRoundingType::CEIL); @@ -292,7 +290,6 @@ public: void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int padx, unsigned int pady, unsigned int num_kernels, DataType data_type, DataLayout data_layout, QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, bool add_bias) { - ARM_COMPUTE_ERROR_ON_MSG(kernel_size_x != kernel_size_y, "Only square kernels supported"); const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels); const TensorShape bias_shape(num_kernels); const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL); @@ -315,7 +312,6 @@ public: unsigned int num_kernels, DataType data_type, DataLayout data_layout, QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, bool add_bias, DataType weights_data_type) { - ARM_COMPUTE_ERROR_ON_MSG(kernel_size_x != kernel_size_y, "Only square kernels supported"); const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels); const TensorShape bias_shape(num_kernels); const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL); -- cgit v1.2.1