diff options
author | Manuel Bottini <manuel.bottini@arm.com> | 2020-04-30 13:28:23 +0100 |
---|---|---|
committer | Manuel Bottini <manuel.bottini@arm.com> | 2020-05-12 09:34:17 +0000 |
commit | 6e10aa395e81b83edb3437191acd7abe1639c7dc (patch) | |
tree | fce78e75102402df6dbbd37e715f9ef855846008 /src/core | |
parent | 0e240151637641e9e0c425d52dd75b7bd11d1159 (diff) | |
download | ComputeLibrary-6e10aa395e81b83edb3437191acd7abe1639c7dc.tar.gz |
COMPMID-3316: NEDeconvolutionLayer failing for a big input
- Using NEDirectConvolution for big shapes since the memory
required explodes for 9x9 kernel
- Adding test cases
- Fix enables only the NEON Deconvolution for NHWC
Change-Id: I8a541346428e5686818f8ecb7f69e2a9106cbceb
Signed-off-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3135
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/CPP/kernels/CPPUpsampleKernel.cpp | 32 |
1 files changed, 22 insertions, 10 deletions
diff --git a/src/core/CPP/kernels/CPPUpsampleKernel.cpp b/src/core/CPP/kernels/CPPUpsampleKernel.cpp index c190543216..8348b4335e 100644 --- a/src/core/CPP/kernels/CPPUpsampleKernel.cpp +++ b/src/core/CPP/kernels/CPPUpsampleKernel.cpp @@ -71,15 +71,19 @@ void CPPUpsampleKernel::run(const Window &window, const ThreadInfo &info) ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window); + const DataLayout data_layout = _input->info()->data_layout(); + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + // Initialize _scaled_output buffer - const int width_scaled = _output->info()->dimension(0); - const int height_scaled = _output->info()->dimension(1); - const int stride_x = _info.stride().first; - const int stride_y = _info.stride().second; - const int start_x = _info.pad_left(); - const int start_y = _info.pad_top(); - const int end_x = width_scaled - _info.pad_right(); - const int end_y = height_scaled - _info.pad_bottom(); + const int width_scaled = _output->info()->dimension(idx_w); + const int height_scaled = _output->info()->dimension(idx_h); + const int stride_width = _info.stride().first; + const int stride_height = _info.stride().second; + const int start_width = _info.pad_left(); + const int start_height = _info.pad_top(); + const int end_width = width_scaled - _info.pad_right(); + const int end_height = height_scaled - _info.pad_bottom(); const size_t element_size = _input->info()->element_size(); // The fill value is normally 0, but for quantized types '0' corresponds to the offset @@ -103,8 +107,16 @@ void CPPUpsampleKernel::run(const Window &window, const ThreadInfo &info) // Create window Window window_out(window); - window_out.set(Window::DimX, Window::Dimension(start_x, end_x, stride_x)); - window_out.set(Window::DimY, Window::Dimension(start_y, end_y, stride_y)); + if(data_layout == DataLayout::NCHW) + { + window_out.set(Window::DimX, Window::Dimension(start_width, end_width, stride_width)); + window_out.set(Window::DimY, Window::Dimension(start_height, end_height, stride_height)); + } + else + { + window_out.set(Window::DimY, Window::Dimension(start_width, end_width, stride_width)); + window_out.set(Window::DimZ, Window::Dimension(start_height, end_height, stride_height)); + } // Create iterators Iterator in(_input, window); |