From 6e10aa395e81b83edb3437191acd7abe1639c7dc Mon Sep 17 00:00:00 2001 From: Manuel Bottini Date: Thu, 30 Apr 2020 13:28:23 +0100 Subject: COMPMID-3316: NEDeconvolutionLayer failing for a big input - Using NEDirectConvolution for big shapes since the memory required explodes for 9x9 kernel - Adding test cases - Fix enables only the NEON Deconvolution for NHWC Change-Id: I8a541346428e5686818f8ecb7f69e2a9106cbceb Signed-off-by: Manuel Bottini Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3135 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Sang-Hoon Park --- arm_compute/core/utils/misc/ShapeCalculator.h | 2 +- .../runtime/NEON/functions/NEDeconvolutionLayer.h | 7 - src/core/CPP/kernels/CPPUpsampleKernel.cpp | 32 ++- src/runtime/NEON/functions/NEConvolutionLayer.cpp | 5 +- .../NEON/functions/NEDeconvolutionLayer.cpp | 218 +++++++-------------- .../NEON/functions/NEDirectConvolutionLayer.cpp | 5 +- tests/validation/CL/DeconvolutionLayer.cpp | 24 ++- tests/validation/NEON/DeconvolutionLayer.cpp | 31 +++ tests/validation/reference/DeconvolutionLayer.cpp | 2 +- 9 files changed, 152 insertions(+), 174 deletions(-) diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index 7d2b7df43b..dfccec8b37 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -485,7 +485,7 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, * @return the calculated shape */ inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &input, const ITensorInfo &weights, unsigned int sx, unsigned int sy, - std::pair &out_dims, unsigned int &padx, unsigned int &pady) + std::pair &out_dims, uint32_t &padx, uint32_t &pady) { const DataLayout data_layout = input.data_layout(); const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h index c4c1664f20..e2ed0e0abc 100644 --- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h @@ -120,16 +120,9 @@ private: NEConvolutionLayer _conv_f; CPPUpsample _upsample_f; NEReverse _flip_weights; - NEPermute _permute_input; - NEPermute _permute_weights; - NEPermute _permute_output; Tensor _scaled_output; Tensor _weights_flipped; - Tensor _permuted_input; - Tensor _permuted_weights; - Tensor _permuted_output; Tensor _flip_axis; - bool _is_nchw; const ITensor *_original_weights; ITensor *_input; PadStrideInfo _info; diff --git a/src/core/CPP/kernels/CPPUpsampleKernel.cpp b/src/core/CPP/kernels/CPPUpsampleKernel.cpp index c190543216..8348b4335e 100644 --- a/src/core/CPP/kernels/CPPUpsampleKernel.cpp +++ b/src/core/CPP/kernels/CPPUpsampleKernel.cpp @@ -71,15 +71,19 @@ void CPPUpsampleKernel::run(const Window &window, const ThreadInfo &info) ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window); + const DataLayout data_layout = _input->info()->data_layout(); + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + // Initialize _scaled_output buffer - const int width_scaled = _output->info()->dimension(0); - const int height_scaled = _output->info()->dimension(1); - const int stride_x = _info.stride().first; - const int stride_y = _info.stride().second; - const int start_x = _info.pad_left(); - const int start_y = _info.pad_top(); - const int end_x = width_scaled - _info.pad_right(); - const int end_y = height_scaled - _info.pad_bottom(); + const int width_scaled = _output->info()->dimension(idx_w); + const int height_scaled = _output->info()->dimension(idx_h); + const int stride_width = _info.stride().first; + const int stride_height = _info.stride().second; + const int start_width = _info.pad_left(); + const int start_height = _info.pad_top(); + const int end_width = width_scaled - _info.pad_right(); + const int end_height = height_scaled - _info.pad_bottom(); const size_t element_size = _input->info()->element_size(); // The fill value is normally 0, but for quantized types '0' corresponds to the offset @@ -103,8 +107,16 @@ void CPPUpsampleKernel::run(const Window &window, const ThreadInfo &info) // Create window Window window_out(window); - window_out.set(Window::DimX, Window::Dimension(start_x, end_x, stride_x)); - window_out.set(Window::DimY, Window::Dimension(start_y, end_y, stride_y)); + if(data_layout == DataLayout::NCHW) + { + window_out.set(Window::DimX, Window::Dimension(start_width, end_width, stride_width)); + window_out.set(Window::DimY, Window::Dimension(start_height, end_height, stride_height)); + } + else + { + window_out.set(Window::DimY, Window::Dimension(start_width, end_width, stride_width)); + window_out.set(Window::DimZ, Window::Dimension(start_height, end_height, stride_height)); + } // Create iterators Iterator in(_input, window); diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp index 1755e9a774..dcd26fc1cd 100644 --- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp @@ -102,7 +102,7 @@ Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info)); break; case ConvolutionMethod::DIRECT: - //Validate Gemm-based Convolution + //Validate Direct Convolution ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info)); break; case ConvolutionMethod::FFT: @@ -167,7 +167,8 @@ ConvolutionMethod NEConvolutionLayer::get_convolution_method(const ITensorInfo * else { // SRGAN - if((input->dimension(idx_h) > 720U) && (output->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9) + // Output might not be initialized when it is an internal tensor of the layer using the convolution + if(input->total_size() > 1e7 && (weights->dimension(idx_h) > 7) && (NEDirectConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info))) { return ConvolutionMethod::DIRECT; diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp index 06885d59e5..c87dac60dc 100644 --- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp @@ -33,21 +33,45 @@ using namespace arm_compute::misc::shape_calculator; namespace arm_compute { +namespace +{ +PadStrideInfo compute_upsample_info(const PadStrideInfo &info, uint32_t deconv_pad_x, uint32_t deconv_pad_y) +{ + const unsigned int pad_left = info.pad_left(); + const unsigned int pad_right = info.pad_right(); + const unsigned int pad_top = info.pad_top(); + const unsigned int pad_bottom = info.pad_bottom(); + const unsigned int stride_x = info.stride().first; + const unsigned int stride_y = info.stride().second; + + // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order to match output shape + unsigned int deconv_pad_left = pad_right > pad_left ? pad_right - pad_left : 0; + unsigned int deconv_pad_right = pad_left > pad_right ? pad_left - pad_right : 0; + deconv_pad_x -= deconv_pad_left + deconv_pad_right; + ARM_COMPUTE_ERROR_ON((deconv_pad_x % 2) != 0); + deconv_pad_left += deconv_pad_x / 2; + deconv_pad_right += deconv_pad_x / 2; + + unsigned int deconv_pad_top = pad_bottom > pad_top ? pad_bottom - pad_top : 0; + unsigned int deconv_pad_bottom = pad_top > pad_bottom ? pad_top - pad_bottom : 0; + deconv_pad_y -= deconv_pad_top + deconv_pad_bottom; + ARM_COMPUTE_ERROR_ON((deconv_pad_y % 2) != 0); + deconv_pad_top += deconv_pad_y / 2; + deconv_pad_bottom += deconv_pad_y / 2; + + return PadStrideInfo(stride_x, stride_y, deconv_pad_left, deconv_pad_right, deconv_pad_top, deconv_pad_bottom, DimensionRoundingType::FLOOR); +} + +} // namespace + NEDeconvolutionLayer::NEDeconvolutionLayer(std::shared_ptr memory_manager) // NOLINT : _memory_group(std::move(memory_manager)), _conv_f(), _upsample_f(), _flip_weights(), - _permute_input(), - _permute_weights(), - _permute_output(), _scaled_output(), _weights_flipped(), - _permuted_input(), - _permuted_weights(), - _permuted_output(), _flip_axis(), - _is_nchw(false), _original_weights(nullptr), _input(nullptr), _info(), @@ -92,8 +116,8 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) != output_shape.z(), "Output's depth is invalid."); } - unsigned int deconv_pad_x = 0; - unsigned int deconv_pad_y = 0; + uint32_t deconv_pad_x = 0; + uint32_t deconv_pad_y = 0; const unsigned int stride_x = info.stride().first; const unsigned int stride_y = info.stride().second; const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input, *weights, stride_x, stride_y, out_dims, deconv_pad_x, deconv_pad_y); @@ -116,136 +140,58 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_ERROR_THROW_ON(NEDeconvolutionLayer::validate(input->info(), weights->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(), info)); - const DataLayout data_layout = input->info()->data_layout(); + const DataLayout data_layout = input->info()->data_layout(); + const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + auto out_dims = deconvolution_output_dimensions(input->info()->dimension(width_idx), input->info()->dimension(height_idx), + weights->info()->dimension(width_idx), weights->info()->dimension(height_idx), info); + + const TensorShape output_shape = compute_deconvolution_output_shape(out_dims, *input->info(), *weights->info()); _input = input; _original_weights = weights; _info = info; _is_prepared = false; - _is_nchw = data_layout == DataLayout::NCHW; - _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32)); - - const unsigned int pad_left = info.pad_left(); - const unsigned int pad_right = info.pad_right(); - const unsigned int pad_top = info.pad_top(); - const unsigned int pad_bottom = info.pad_bottom(); - const unsigned int stride_x = info.stride().first; - const unsigned int stride_y = info.stride().second; - const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - auto out_dims = deconvolution_output_dimensions(input->info()->dimension(width_idx), input->info()->dimension(height_idx), - weights->info()->dimension(width_idx), weights->info()->dimension(height_idx), info); + const unsigned int stride_x = info.stride().first; + const unsigned int stride_y = info.stride().second; - const TensorShape output_shape = compute_deconvolution_output_shape(out_dims, *input->info(), *weights->info()); // Output auto initialization if not yet initialized auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->quantization_info()); _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32)); _memory_group.manage(&_scaled_output); + _memory_group.manage(&_flip_axis); - if(!_is_nchw) - { - _memory_group.manage(&_permuted_input); - _memory_group.manage(&_permuted_output); - - // Configure the function to transform the input tensor from NHWC -> NCHW - _permuted_input.info()->set_quantization_info(input->info()->quantization_info()); - _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U)); - _permuted_input.info()->set_data_layout(DataLayout::NCHW); - - // Configure the function to transform the weights tensor from NHWC -> NCHW - _permuted_weights.info()->set_quantization_info(weights->info()->quantization_info()); - _permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U)); - _permuted_weights.info()->set_data_layout(DataLayout::NCHW); - - // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order to match output shape - unsigned int deconv_pad_x = 0; - unsigned int deconv_pad_y = 0; - const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*_permuted_input.info(), *_permuted_weights.info(), stride_x, stride_y, out_dims, - deconv_pad_x, deconv_pad_y); - - unsigned int deconv_pad_left = pad_right > pad_left ? pad_right - pad_left : 0; - unsigned int deconv_pad_right = pad_left > pad_right ? pad_left - pad_right : 0; - deconv_pad_x -= deconv_pad_left + deconv_pad_right; - ARM_COMPUTE_ERROR_ON((deconv_pad_x % 2) != 0); - deconv_pad_left += deconv_pad_x / 2; - deconv_pad_right += deconv_pad_x / 2; - - unsigned int deconv_pad_top = pad_bottom > pad_top ? pad_bottom - pad_top : 0; - unsigned int deconv_pad_bottom = pad_top > pad_bottom ? pad_top - pad_bottom : 0; - deconv_pad_y -= deconv_pad_top + deconv_pad_bottom; - ARM_COMPUTE_ERROR_ON((deconv_pad_y % 2) != 0); - deconv_pad_top += deconv_pad_y / 2; - deconv_pad_bottom += deconv_pad_y / 2; - - TensorInfo scale_out_info(scale_out_shape, 1, _permuted_input.info()->data_type(), _permuted_input.info()->quantization_info()); - scale_out_info.set_data_layout(DataLayout::NCHW); - _scaled_output.allocator()->init(scale_out_info); - - const PadStrideInfo upsample_info(stride_x, stride_y, deconv_pad_left, deconv_pad_right, deconv_pad_top, deconv_pad_bottom, DimensionRoundingType::FLOOR); - _upsample_f.configure(&_permuted_input, &_scaled_output, upsample_info); - - _weights_flipped.allocator()->init(*_permuted_weights.info()->clone()); - _weights_flipped.info()->set_quantization_info(weights->info()->quantization_info()); - _flip_weights.configure(&_permuted_weights, &_weights_flipped, &_flip_axis); - - // setup the function to convolve the upscaled output - const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); - - _permuted_output.info()->set_quantization_info(output->info()->quantization_info()); - _conv_f.configure(&_scaled_output, &_weights_flipped, bias, &_permuted_output, conv_info); - - // Configure the function to transform the convoluted output to NHWC - _permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U)); - _permuted_output.info()->set_data_layout(DataLayout::NCHW); - - _permuted_input.allocator()->allocate(); - _permuted_output.allocator()->allocate(); - } - else - { - // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order to match output shape - unsigned int deconv_pad_x = 0; - unsigned int deconv_pad_y = 0; - const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(), stride_x, stride_y, - out_dims, deconv_pad_x, deconv_pad_y); - - unsigned int deconv_pad_left = pad_right > pad_left ? pad_right - pad_left : 0; - unsigned int deconv_pad_right = pad_left > pad_right ? pad_left - pad_right : 0; - deconv_pad_x -= deconv_pad_left + deconv_pad_right; - ARM_COMPUTE_ERROR_ON((deconv_pad_x % 2) != 0); - deconv_pad_left += deconv_pad_x / 2; - deconv_pad_right += deconv_pad_x / 2; - - unsigned int deconv_pad_top = pad_bottom > pad_top ? pad_bottom - pad_top : 0; - unsigned int deconv_pad_bottom = pad_top > pad_bottom ? pad_top - pad_bottom : 0; - deconv_pad_y -= deconv_pad_top + deconv_pad_bottom; - ARM_COMPUTE_ERROR_ON((deconv_pad_y % 2) != 0); - deconv_pad_top += deconv_pad_y / 2; - deconv_pad_bottom += deconv_pad_y / 2; - - TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info()); - scale_out_info.set_data_layout(data_layout); - _scaled_output.allocator()->init(scale_out_info); - - const PadStrideInfo upsample_info(stride_x, stride_y, deconv_pad_left, deconv_pad_right, deconv_pad_top, deconv_pad_bottom, DimensionRoundingType::FLOOR); - _upsample_f.configure(input, &_scaled_output, upsample_info); - - _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout)); - _flip_weights.configure(weights, &_weights_flipped, &_flip_axis); - - // setup the function to convolve the upscaled output - const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); - _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info); - } - _scaled_output.allocator()->allocate(); + _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout)); + _flip_weights.configure(weights, &_weights_flipped, &_flip_axis); + + // setup the function to convolve the upscaled output + const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); + uint32_t deconv_pad_x = 0; + uint32_t deconv_pad_y = 0; + + const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(), + stride_x, stride_y, + out_dims, deconv_pad_x, deconv_pad_y); + + const PadStrideInfo upsample_info = compute_upsample_info(info, deconv_pad_x, deconv_pad_y); + + TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info()); + scale_out_info.set_data_layout(data_layout); + _scaled_output.allocator()->init(scale_out_info); + + _upsample_f.configure(input, &_scaled_output, upsample_info); + + _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info); // Setup flip axis data _flip_axis.allocator()->allocate(); auto axis_data = reinterpret_cast(_flip_axis.buffer()); - axis_data[0] = 0; - axis_data[1] = 1; + axis_data[0] = static_cast(width_idx); + axis_data[1] = static_cast(height_idx); + + _scaled_output.allocator()->allocate(); } void NEDeconvolutionLayer::run() @@ -254,20 +200,8 @@ void NEDeconvolutionLayer::run() MemoryGroupResourceScope scope_mg(_memory_group); - // Permute input - if(!_is_nchw) - { - _permute_input.run(); - } - _upsample_f.run(); _conv_f.run(); - - // Permute output - if(!_is_nchw) - { - _permute_output.run(); - } } void NEDeconvolutionLayer::prepare() @@ -275,13 +209,6 @@ void NEDeconvolutionLayer::prepare() if(!_is_prepared) { ARM_COMPUTE_ERROR_ON(!_original_weights->is_used()); - // Permute weights - if(!_is_nchw) - { - // Manually manage _permuted_weights - _permuted_weights.allocator()->allocate(); - _permute_weights.run(); - } // Run weights flipping and mark original weights tensor as unused _weights_flipped.allocator()->allocate(); @@ -291,15 +218,6 @@ void NEDeconvolutionLayer::prepare() // Prepare convolution _conv_f.prepare(); - // Unused weights are already released in _conv_f - - if(!_is_nchw) - { - // Manually manage _permuted_weights - // Free _permuted_weights as it not used after this method (prepare) - _permuted_weights.allocator()->free(); - } - _is_prepared = true; } } diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp index 65538848df..751a3fa1fb 100644 --- a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -76,7 +76,8 @@ Status NEDirectConvolutionLayer::validate(const ITensorInfo *input, const ITenso { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - DataType data_type = output->data_type(); + // output might not be initialized since it can be an intermediate tensor of another layer + DataType data_type = input->data_type(); TensorInfo accumulator(output->clone()->set_is_resizable(true).reset_padding().set_data_type(data_type)); // Validate Convolution kernel diff --git a/tests/validation/CL/DeconvolutionLayer.cpp b/tests/validation/CL/DeconvolutionLayer.cpp index e7ba930ebe..dd92887ae8 100644 --- a/tests/validation/CL/DeconvolutionLayer.cpp +++ b/tests/validation/CL/DeconvolutionLayer.cpp @@ -49,6 +49,16 @@ RelativeTolerance tolerance_f16(half_float::half(0.2)); /**< T constexpr AbsoluteTolerance tolerance_qasymm8(1.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ constexpr float tolerance_num = 0.07f; /**< Tolerance number */ +const auto data9x9_small_asymm = framework::dataset::make("InputShape", TensorShape{ 10U, 10U, 1U, 1U }) *framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY", + 2) + *framework::dataset::make("PadLeft", 3) + *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 }); + +const auto data9x9_large_asymm = framework::dataset::make("InputShape", TensorShape{ 640U, 360U, 56U, 1U }) *framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY", + 2) + *framework::dataset::make("PadLeft", 3) + *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 }); + const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 3) * framework::dataset::make("PadY", 0, 3) * framework::dataset::make("NumKernels", { 3 }); @@ -137,6 +147,9 @@ using CLDeconvolutionLayerFixture2x2 = DeconvolutionValidationFixture using CLDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture; +template +using CLDeconvolutionLayerAsymmFixture9x9 = DeconvolutionValidationAsymmFixture; + TEST_SUITE(Float) TEST_SUITE(FP32) @@ -196,7 +209,16 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture1x1, framework::Da validate(CLAccessor(_target), _reference, tolerance_fp32); } TEST_SUITE_END() // W1x1 - +TEST_SUITE(W9x9) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerAsymmFixture9x9, framework::DatasetMode::ALL, combine(combine(combine(data9x9_small_asymm, framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("AddBias", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_fp32); +} +TEST_SUITE_END() // W9x9 TEST_SUITE_END() // FP32 TEST_SUITE(FP16) diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp index 38256eb2ad..d888d7b838 100644 --- a/tests/validation/NEON/DeconvolutionLayer.cpp +++ b/tests/validation/NEON/DeconvolutionLayer.cpp @@ -59,6 +59,16 @@ const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset:: const auto data3x3_asymm = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadLeft", 0, 1) * framework::dataset::make("PadRight", 0, 1) * framework::dataset::make("PadTop", 0, 1) * framework::dataset::make("PadBottom", 0, 1) * framework::dataset::make("NumKernels", { 3 }); +const auto data9x9_small_asymm = framework::dataset::make("InputShape", TensorShape{ 10U, 10U, 1U, 1U }) *framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY", + 2) + *framework::dataset::make("PadLeft", 3) + *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 }); + +const auto data9x9_large_asymm = framework::dataset::make("InputShape", TensorShape{ 640U, 360U, 56U, 1U }) *framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY", + 2) + *framework::dataset::make("PadLeft", 3) + *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 }); + const auto data3x3_precommit = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadX", 0, 2) * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", { 3 }); @@ -141,6 +151,9 @@ using NEDeconvolutionLayerFixture3x3 = DeconvolutionValidationFixture using NEDeconvolutionLayerAsymmFixture3x3 = DeconvolutionValidationAsymmFixture; +template +using NEDeconvolutionLayerAsymmFixture9x9 = DeconvolutionValidationAsymmFixture; + template using NEDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture; @@ -189,6 +202,24 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture1x1, framework::Da validate(Accessor(_target), _reference, tolerance_fp32); } TEST_SUITE_END() // W1x1 +TEST_SUITE(W9x9) +FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerAsymmFixture9x9, framework::DatasetMode::ALL, combine(combine(combine(data9x9_small_asymm, framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("AddBias", { false }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerAsymmFixture9x9, framework::DatasetMode::NIGHTLY, combine(combine(combine(data9x9_large_asymm, framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + framework::dataset::make("AddBias", { false }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp32); +} +TEST_SUITE_END() // W9x9 TEST_SUITE_END() // FP32 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC diff --git a/tests/validation/reference/DeconvolutionLayer.cpp b/tests/validation/reference/DeconvolutionLayer.cpp index 01b9c1c403..3cfbfae163 100644 --- a/tests/validation/reference/DeconvolutionLayer.cpp +++ b/tests/validation/reference/DeconvolutionLayer.cpp @@ -152,4 +152,4 @@ template SimpleTensor deconvolution_layer(const SimpleTensor &src, c } // namespace reference } // namespace validation } // namespace test -} // namespace arm_compute +} // namespace arm_compute \ No newline at end of file -- cgit v1.2.1