diff options
authorManuel Bottini <manuel.bottini@arm.com>2020-04-30 13:28:23 +0100
committerManuel Bottini <manuel.bottini@arm.com>2020-05-12 09:34:17 +0000
commit6e10aa395e81b83edb3437191acd7abe1639c7dc (patch)
parent0e240151637641e9e0c425d52dd75b7bd11d1159 (diff)
COMPMID-3316: NEDeconvolutionLayer failing for a big input
- Using NEDirectConvolution for big shapes since the memory required explodes for 9x9 kernel - Adding test cases - Fix enables only the NEON Deconvolution for NHWC Change-Id: I8a541346428e5686818f8ecb7f69e2a9106cbceb Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3135 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com>
9 files changed, 152 insertions, 174 deletions
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index 7d2b7df43b..dfccec8b37 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -485,7 +485,7 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input,
* @return the calculated shape
inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &input, const ITensorInfo &weights, unsigned int sx, unsigned int sy,
- std::pair<unsigned int, unsigned int> &out_dims, unsigned int &padx, unsigned int &pady)
+ std::pair<unsigned int, unsigned int> &out_dims, uint32_t &padx, uint32_t &pady)
const DataLayout data_layout = input.data_layout();
const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
index c4c1664f20..e2ed0e0abc 100644
--- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
@@ -120,16 +120,9 @@ private:
NEConvolutionLayer _conv_f;
CPPUpsample _upsample_f;
NEReverse _flip_weights;
- NEPermute _permute_input;
- NEPermute _permute_weights;
- NEPermute _permute_output;
Tensor _scaled_output;
Tensor _weights_flipped;
- Tensor _permuted_input;
- Tensor _permuted_weights;
- Tensor _permuted_output;
Tensor _flip_axis;
- bool _is_nchw;
const ITensor *_original_weights;
ITensor *_input;
PadStrideInfo _info;
diff --git a/src/core/CPP/kernels/CPPUpsampleKernel.cpp b/src/core/CPP/kernels/CPPUpsampleKernel.cpp
index c190543216..8348b4335e 100644
--- a/src/core/CPP/kernels/CPPUpsampleKernel.cpp
+++ b/src/core/CPP/kernels/CPPUpsampleKernel.cpp
@@ -71,15 +71,19 @@ void CPPUpsampleKernel::run(const Window &window, const ThreadInfo &info)
+ const DataLayout data_layout = _input->info()->data_layout();
+ const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
// Initialize _scaled_output buffer
- const int width_scaled = _output->info()->dimension(0);
- const int height_scaled = _output->info()->dimension(1);
- const int stride_x = _info.stride().first;
- const int stride_y = _info.stride().second;
- const int start_x = _info.pad_left();
- const int start_y = _info.pad_top();
- const int end_x = width_scaled - _info.pad_right();
- const int end_y = height_scaled - _info.pad_bottom();
+ const int width_scaled = _output->info()->dimension(idx_w);
+ const int height_scaled = _output->info()->dimension(idx_h);
+ const int stride_width = _info.stride().first;
+ const int stride_height = _info.stride().second;
+ const int start_width = _info.pad_left();
+ const int start_height = _info.pad_top();
+ const int end_width = width_scaled - _info.pad_right();
+ const int end_height = height_scaled - _info.pad_bottom();
const size_t element_size = _input->info()->element_size();
// The fill value is normally 0, but for quantized types '0' corresponds to the offset
@@ -103,8 +107,16 @@ void CPPUpsampleKernel::run(const Window &window, const ThreadInfo &info)
// Create window
Window window_out(window);
- window_out.set(Window::DimX, Window::Dimension(start_x, end_x, stride_x));
- window_out.set(Window::DimY, Window::Dimension(start_y, end_y, stride_y));
+ if(data_layout == DataLayout::NCHW)
+ {
+ window_out.set(Window::DimX, Window::Dimension(start_width, end_width, stride_width));
+ window_out.set(Window::DimY, Window::Dimension(start_height, end_height, stride_height));
+ }
+ else
+ {
+ window_out.set(Window::DimY, Window::Dimension(start_width, end_width, stride_width));
+ window_out.set(Window::DimZ, Window::Dimension(start_height, end_height, stride_height));
+ }
// Create iterators
Iterator in(_input, window);
diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
index 1755e9a774..dcd26fc1cd 100644
--- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp
@@ -102,7 +102,7 @@ Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo
ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation, act_info));
case ConvolutionMethod::DIRECT:
- //Validate Gemm-based Convolution
+ //Validate Direct Convolution
ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayer::validate(input, weights, biases, output, conv_info, act_info));
case ConvolutionMethod::FFT:
@@ -167,7 +167,8 @@ ConvolutionMethod NEConvolutionLayer::get_convolution_method(const ITensorInfo *
- if((input->dimension(idx_h) > 720U) && (output->dimension(idx_h) > 720U) && (weights->dimension(idx_h) == 9)
+ // Output might not be initialized when it is an internal tensor of the layer using the convolution
+ if(input->total_size() > 1e7 && (weights->dimension(idx_h) > 7)
&& (NEDirectConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info)))
return ConvolutionMethod::DIRECT;
diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
index 06885d59e5..c87dac60dc 100644
--- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
@@ -33,21 +33,45 @@ using namespace arm_compute::misc::shape_calculator;
namespace arm_compute
+PadStrideInfo compute_upsample_info(const PadStrideInfo &info, uint32_t deconv_pad_x, uint32_t deconv_pad_y)
+ const unsigned int pad_left = info.pad_left();
+ const unsigned int pad_right = info.pad_right();
+ const unsigned int pad_top = info.pad_top();
+ const unsigned int pad_bottom = info.pad_bottom();
+ const unsigned int stride_x = info.stride().first;
+ const unsigned int stride_y = info.stride().second;
+ // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order to match output shape
+ unsigned int deconv_pad_left = pad_right > pad_left ? pad_right - pad_left : 0;
+ unsigned int deconv_pad_right = pad_left > pad_right ? pad_left - pad_right : 0;
+ deconv_pad_x -= deconv_pad_left + deconv_pad_right;
+ ARM_COMPUTE_ERROR_ON((deconv_pad_x % 2) != 0);
+ deconv_pad_left += deconv_pad_x / 2;
+ deconv_pad_right += deconv_pad_x / 2;
+ unsigned int deconv_pad_top = pad_bottom > pad_top ? pad_bottom - pad_top : 0;
+ unsigned int deconv_pad_bottom = pad_top > pad_bottom ? pad_top - pad_bottom : 0;
+ deconv_pad_y -= deconv_pad_top + deconv_pad_bottom;
+ ARM_COMPUTE_ERROR_ON((deconv_pad_y % 2) != 0);
+ deconv_pad_top += deconv_pad_y / 2;
+ deconv_pad_bottom += deconv_pad_y / 2;
+ return PadStrideInfo(stride_x, stride_y, deconv_pad_left, deconv_pad_right, deconv_pad_top, deconv_pad_bottom, DimensionRoundingType::FLOOR);
+} // namespace
NEDeconvolutionLayer::NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager) // NOLINT
: _memory_group(std::move(memory_manager)),
- _permute_input(),
- _permute_weights(),
- _permute_output(),
- _permuted_input(),
- _permuted_weights(),
- _permuted_output(),
- _is_nchw(false),
@@ -92,8 +116,8 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf
ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) != output_shape.z(), "Output's depth is invalid.");
- unsigned int deconv_pad_x = 0;
- unsigned int deconv_pad_y = 0;
+ uint32_t deconv_pad_x = 0;
+ uint32_t deconv_pad_y = 0;
const unsigned int stride_x = info.stride().first;
const unsigned int stride_y = info.stride().second;
const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input, *weights, stride_x, stride_y, out_dims, deconv_pad_x, deconv_pad_y);
@@ -116,136 +140,58 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_ERROR_THROW_ON(NEDeconvolutionLayer::validate(input->info(), weights->info(), (bias == nullptr) ? nullptr : bias->info(), output->info(), info));
- const DataLayout data_layout = input->info()->data_layout();
+ const DataLayout data_layout = input->info()->data_layout();
+ const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ auto out_dims = deconvolution_output_dimensions(input->info()->dimension(width_idx), input->info()->dimension(height_idx),
+ weights->info()->dimension(width_idx), weights->info()->dimension(height_idx), info);
+ const TensorShape output_shape = compute_deconvolution_output_shape(out_dims, *input->info(), *weights->info());
_input = input;
_original_weights = weights;
_info = info;
_is_prepared = false;
- _is_nchw = data_layout == DataLayout::NCHW;
- _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
- const unsigned int pad_left = info.pad_left();
- const unsigned int pad_right = info.pad_right();
- const unsigned int pad_top = info.pad_top();
- const unsigned int pad_bottom = info.pad_bottom();
- const unsigned int stride_x = info.stride().first;
- const unsigned int stride_y = info.stride().second;
- const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- auto out_dims = deconvolution_output_dimensions(input->info()->dimension(width_idx), input->info()->dimension(height_idx),
- weights->info()->dimension(width_idx), weights->info()->dimension(height_idx), info);
+ const unsigned int stride_x = info.stride().first;
+ const unsigned int stride_y = info.stride().second;
- const TensorShape output_shape = compute_deconvolution_output_shape(out_dims, *input->info(), *weights->info());
// Output auto initialization if not yet initialized
auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->quantization_info());
_flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
+ _memory_group.manage(&_flip_axis);
- if(!_is_nchw)
- {
- _memory_group.manage(&_permuted_input);
- _memory_group.manage(&_permuted_output);
- // Configure the function to transform the input tensor from NHWC -> NCHW
- _permuted_input.info()->set_quantization_info(input->info()->quantization_info());
- _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
- _permuted_input.info()->set_data_layout(DataLayout::NCHW);
- // Configure the function to transform the weights tensor from NHWC -> NCHW
- _permuted_weights.info()->set_quantization_info(weights->info()->quantization_info());
- _permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
- _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
- // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order to match output shape
- unsigned int deconv_pad_x = 0;
- unsigned int deconv_pad_y = 0;
- const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*_permuted_input.info(), *_permuted_weights.info(), stride_x, stride_y, out_dims,
- deconv_pad_x, deconv_pad_y);
- unsigned int deconv_pad_left = pad_right > pad_left ? pad_right - pad_left : 0;
- unsigned int deconv_pad_right = pad_left > pad_right ? pad_left - pad_right : 0;
- deconv_pad_x -= deconv_pad_left + deconv_pad_right;
- ARM_COMPUTE_ERROR_ON((deconv_pad_x % 2) != 0);
- deconv_pad_left += deconv_pad_x / 2;
- deconv_pad_right += deconv_pad_x / 2;
- unsigned int deconv_pad_top = pad_bottom > pad_top ? pad_bottom - pad_top : 0;
- unsigned int deconv_pad_bottom = pad_top > pad_bottom ? pad_top - pad_bottom : 0;
- deconv_pad_y -= deconv_pad_top + deconv_pad_bottom;
- ARM_COMPUTE_ERROR_ON((deconv_pad_y % 2) != 0);
- deconv_pad_top += deconv_pad_y / 2;
- deconv_pad_bottom += deconv_pad_y / 2;
- TensorInfo scale_out_info(scale_out_shape, 1, _permuted_input.info()->data_type(), _permuted_input.info()->quantization_info());
- scale_out_info.set_data_layout(DataLayout::NCHW);
- _scaled_output.allocator()->init(scale_out_info);
- const PadStrideInfo upsample_info(stride_x, stride_y, deconv_pad_left, deconv_pad_right, deconv_pad_top, deconv_pad_bottom, DimensionRoundingType::FLOOR);
- _upsample_f.configure(&_permuted_input, &_scaled_output, upsample_info);
- _weights_flipped.allocator()->init(*_permuted_weights.info()->clone());
- _weights_flipped.info()->set_quantization_info(weights->info()->quantization_info());
- _flip_weights.configure(&_permuted_weights, &_weights_flipped, &_flip_axis);
- // setup the function to convolve the upscaled output
- const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
- _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
- _conv_f.configure(&_scaled_output, &_weights_flipped, bias, &_permuted_output, conv_info);
- // Configure the function to transform the convoluted output to NHWC
- _permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
- _permuted_output.info()->set_data_layout(DataLayout::NCHW);
- _permuted_input.allocator()->allocate();
- _permuted_output.allocator()->allocate();
- }
- else
- {
- // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order to match output shape
- unsigned int deconv_pad_x = 0;
- unsigned int deconv_pad_y = 0;
- const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(), stride_x, stride_y,
- out_dims, deconv_pad_x, deconv_pad_y);
- unsigned int deconv_pad_left = pad_right > pad_left ? pad_right - pad_left : 0;
- unsigned int deconv_pad_right = pad_left > pad_right ? pad_left - pad_right : 0;
- deconv_pad_x -= deconv_pad_left + deconv_pad_right;
- ARM_COMPUTE_ERROR_ON((deconv_pad_x % 2) != 0);
- deconv_pad_left += deconv_pad_x / 2;
- deconv_pad_right += deconv_pad_x / 2;
- unsigned int deconv_pad_top = pad_bottom > pad_top ? pad_bottom - pad_top : 0;
- unsigned int deconv_pad_bottom = pad_top > pad_bottom ? pad_top - pad_bottom : 0;
- deconv_pad_y -= deconv_pad_top + deconv_pad_bottom;
- ARM_COMPUTE_ERROR_ON((deconv_pad_y % 2) != 0);
- deconv_pad_top += deconv_pad_y / 2;
- deconv_pad_bottom += deconv_pad_y / 2;
- TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info());
- scale_out_info.set_data_layout(data_layout);
- _scaled_output.allocator()->init(scale_out_info);
- const PadStrideInfo upsample_info(stride_x, stride_y, deconv_pad_left, deconv_pad_right, deconv_pad_top, deconv_pad_bottom, DimensionRoundingType::FLOOR);
- _upsample_f.configure(input, &_scaled_output, upsample_info);
- _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
- _flip_weights.configure(weights, &_weights_flipped, &_flip_axis);
- // setup the function to convolve the upscaled output
- const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
- _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info);
- }
- _scaled_output.allocator()->allocate();
+ _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
+ _flip_weights.configure(weights, &_weights_flipped, &_flip_axis);
+ // setup the function to convolve the upscaled output
+ const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+ uint32_t deconv_pad_x = 0;
+ uint32_t deconv_pad_y = 0;
+ const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(),
+ stride_x, stride_y,
+ out_dims, deconv_pad_x, deconv_pad_y);
+ const PadStrideInfo upsample_info = compute_upsample_info(info, deconv_pad_x, deconv_pad_y);
+ TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info());
+ scale_out_info.set_data_layout(data_layout);
+ _scaled_output.allocator()->init(scale_out_info);
+ _upsample_f.configure(input, &_scaled_output, upsample_info);
+ _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info);
// Setup flip axis data
auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer());
- axis_data[0] = 0;
- axis_data[1] = 1;
+ axis_data[0] = static_cast<uint32_t>(width_idx);
+ axis_data[1] = static_cast<uint32_t>(height_idx);
+ _scaled_output.allocator()->allocate();
void NEDeconvolutionLayer::run()
@@ -254,20 +200,8 @@ void NEDeconvolutionLayer::run()
MemoryGroupResourceScope scope_mg(_memory_group);
- // Permute input
- if(!_is_nchw)
- {
- _permute_input.run();
- }
- // Permute output
- if(!_is_nchw)
- {
- _permute_output.run();
- }
void NEDeconvolutionLayer::prepare()
@@ -275,13 +209,6 @@ void NEDeconvolutionLayer::prepare()
- // Permute weights
- if(!_is_nchw)
- {
- // Manually manage _permuted_weights
- _permuted_weights.allocator()->allocate();
- _permute_weights.run();
- }
// Run weights flipping and mark original weights tensor as unused
@@ -291,15 +218,6 @@ void NEDeconvolutionLayer::prepare()
// Prepare convolution
- // Unused weights are already released in _conv_f
- if(!_is_nchw)
- {
- // Manually manage _permuted_weights
- // Free _permuted_weights as it not used after this method (prepare)
- _permuted_weights.allocator()->free();
- }
_is_prepared = true;
diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
index 65538848df..751a3fa1fb 100644
--- a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp
@@ -1,5 +1,5 @@
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
* SPDX-License-Identifier: MIT
@@ -76,7 +76,8 @@ Status NEDirectConvolutionLayer::validate(const ITensorInfo *input, const ITenso
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
- DataType data_type = output->data_type();
+ // output might not be initialized since it can be an intermediate tensor of another layer
+ DataType data_type = input->data_type();
TensorInfo accumulator(output->clone()->set_is_resizable(true).reset_padding().set_data_type(data_type));
// Validate Convolution kernel
diff --git a/tests/validation/CL/DeconvolutionLayer.cpp b/tests/validation/CL/DeconvolutionLayer.cpp
index e7ba930ebe..dd92887ae8 100644
--- a/tests/validation/CL/DeconvolutionLayer.cpp
+++ b/tests/validation/CL/DeconvolutionLayer.cpp
@@ -49,6 +49,16 @@ RelativeTolerance<half_float::half> tolerance_f16(half_float::half(0.2)); /**< T
constexpr AbsoluteTolerance<float> tolerance_qasymm8(1.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
constexpr float tolerance_num = 0.07f; /**< Tolerance number */
+const auto data9x9_small_asymm = framework::dataset::make("InputShape", TensorShape{ 10U, 10U, 1U, 1U }) *framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY",
+ 2)
+ *framework::dataset::make("PadLeft", 3)
+ *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 });
+const auto data9x9_large_asymm = framework::dataset::make("InputShape", TensorShape{ 640U, 360U, 56U, 1U }) *framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY",
+ 2)
+ *framework::dataset::make("PadLeft", 3)
+ *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 });
const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 3)
* framework::dataset::make("PadY", 0, 3) * framework::dataset::make("NumKernels", { 3 });
@@ -137,6 +147,9 @@ using CLDeconvolutionLayerFixture2x2 = DeconvolutionValidationFixture<CLTensor,
template <typename T>
using CLDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 1, 1>;
+template <typename T>
+using CLDeconvolutionLayerAsymmFixture9x9 = DeconvolutionValidationAsymmFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 9, 9>;
@@ -196,7 +209,16 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture1x1<float>, framework::Da
validate(CLAccessor(_target), _reference, tolerance_fp32);
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerAsymmFixture9x9<float>, framework::DatasetMode::ALL, combine(combine(combine(data9x9_small_asymm, framework::dataset::make("DataType",
+ DataType::F32)),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+ framework::dataset::make("AddBias", { false })))
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_fp32);
diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp
index 38256eb2ad..d888d7b838 100644
--- a/tests/validation/NEON/DeconvolutionLayer.cpp
+++ b/tests/validation/NEON/DeconvolutionLayer.cpp
@@ -59,6 +59,16 @@ const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::
const auto data3x3_asymm = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadLeft", 0, 1)
* framework::dataset::make("PadRight", 0, 1) * framework::dataset::make("PadTop", 0, 1) * framework::dataset::make("PadBottom", 0, 1) * framework::dataset::make("NumKernels", { 3 });
+const auto data9x9_small_asymm = framework::dataset::make("InputShape", TensorShape{ 10U, 10U, 1U, 1U }) *framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY",
+ 2)
+ *framework::dataset::make("PadLeft", 3)
+ *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 });
+const auto data9x9_large_asymm = framework::dataset::make("InputShape", TensorShape{ 640U, 360U, 56U, 1U }) *framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY",
+ 2)
+ *framework::dataset::make("PadLeft", 3)
+ *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 });
const auto data3x3_precommit = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadX", 0, 2)
* framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", { 3 });
@@ -142,6 +152,9 @@ template <typename T>
using NEDeconvolutionLayerAsymmFixture3x3 = DeconvolutionValidationAsymmFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 3, 3>;
template <typename T>
+using NEDeconvolutionLayerAsymmFixture9x9 = DeconvolutionValidationAsymmFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 9, 9>;
+template <typename T>
using NEDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture<Tensor, Accessor, NEDeconvolutionLayer, T, 1, 1>;
@@ -189,6 +202,24 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture1x1<float>, framework::Da
validate(Accessor(_target), _reference, tolerance_fp32);
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerAsymmFixture9x9<float>, framework::DatasetMode::ALL, combine(combine(combine(data9x9_small_asymm, framework::dataset::make("DataType",
+ DataType::F32)),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+ framework::dataset::make("AddBias", { false })))
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_fp32);
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerAsymmFixture9x9<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data9x9_large_asymm, framework::dataset::make("DataType",
+ DataType::F32)),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+ framework::dataset::make("AddBias", { false })))
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_fp32);
diff --git a/tests/validation/reference/DeconvolutionLayer.cpp b/tests/validation/reference/DeconvolutionLayer.cpp
index 01b9c1c403..3cfbfae163 100644
--- a/tests/validation/reference/DeconvolutionLayer.cpp
+++ b/tests/validation/reference/DeconvolutionLayer.cpp
@@ -152,4 +152,4 @@ template SimpleTensor<half> deconvolution_layer(const SimpleTensor<half> &src, c
} // namespace reference
} // namespace validation
} // namespace test
-} // namespace arm_compute
+} // namespace arm_compute \ No newline at end of file