aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2019-07-10 17:06:12 +0100
committerManuel Bottini <manuel.bottini@arm.com>2019-07-19 09:49:54 +0000
commitd25af6786c0a714f2b4f099d0338dab17a5dc7e1 (patch)
treeb8a1eafa1d01be903a238fff92ad6acd34b6a499 /src
parentdb9116ff15170ff734aad0300b46c48abc2a3b7b (diff)
downloadComputeLibrary-d25af6786c0a714f2b4f099d0338dab17a5dc7e1.tar.gz
COMPMID-2456: NEDeconvolutionLayer.cpp, NHWC is not supported
Support of NHWC for NEDeconvolutionLayer Bugfix for QASYMM8 in CPPUpsample when offset is different than 0 QASYMM8 tests added in NEUpsample with offset different than 0 Change-Id: I8283fa5e5e323fd4d5777136359ddb33025674bb Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/1517 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/CPP/kernels/CPPUpsampleKernel.cpp10
-rw-r--r--src/runtime/NEON/functions/NEDeconvolutionLayer.cpp142
2 files changed, 119 insertions, 33 deletions
diff --git a/src/core/CPP/kernels/CPPUpsampleKernel.cpp b/src/core/CPP/kernels/CPPUpsampleKernel.cpp
index 6620ce2aeb..ad2d54a0f8 100644
--- a/src/core/CPP/kernels/CPPUpsampleKernel.cpp
+++ b/src/core/CPP/kernels/CPPUpsampleKernel.cpp
@@ -34,8 +34,8 @@
#include <cstddef>
#include <cstdint>
-using namespace arm_compute;
-
+namespace arm_compute
+{
CPPUpsampleKernel::CPPUpsampleKernel()
: _input(nullptr), _output(nullptr), _info()
{
@@ -82,7 +82,10 @@ void CPPUpsampleKernel::run(const Window &window, const ThreadInfo &info)
const int end_x = width_scaled - _info.pad().first;
const size_t element_size = _input->info()->element_size();
- std::fill_n(_output->buffer(), _output->info()->total_size(), 0);
+ //The fill value is normally 0, but for QASYMM8 the '0' corresponds to the offset
+ const uint8_t fill_value = _output->info()->data_type() == DataType::QASYMM8 ? utility::clamp<uint8_t>(_output->info()->quantization_info().uniform().offset) : 0;
+ //Filling a value different than 0 works only for QASYMM8 datatype since we are filling 1byte values in a buffer of uint8_ts
+ std::fill_n(_output->buffer(), _output->info()->total_size(), fill_value);
// Create window
Window window_out(window);
@@ -99,3 +102,4 @@ void CPPUpsampleKernel::run(const Window &window, const ThreadInfo &info)
},
in, out);
}
+} // namespace arm_compute \ No newline at end of file
diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
index 7bbacb139c..581f257581 100644
--- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
@@ -38,8 +38,15 @@ NEDeconvolutionLayer::NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memor
_conv_f(),
_upsample_f(),
_flip_weights(),
+ _permute_input(),
+ _permute_weights(),
+ _permute_output(),
_scaled_output(),
_weights_flipped(),
+ _permuted_input(),
+ _permuted_weights(),
+ _permuted_output(),
+ _is_nchw(false),
_original_weights(nullptr),
_input(nullptr),
_info(),
@@ -52,18 +59,20 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, DataType::QASYMM8);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, input);
- ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != weights->dimension(1));
- ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) < 1);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(weights, input);
+ const unsigned int width_idx = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH);
+ const unsigned int height_idx = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::HEIGHT);
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) != weights->dimension(height_idx));
+ ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(width_idx) < 1);
ARM_COMPUTE_RETURN_ERROR_ON(!info.padding_is_symmetric());
const unsigned int stride_x = info.stride().first;
const unsigned int stride_y = info.stride().second;
- auto out_dims = deconvolution_output_dimensions(input->dimension(0), input->dimension(1), weights->dimension(0), weights->dimension(1),
+ auto out_dims = deconvolution_output_dimensions(input->dimension(width_idx), input->dimension(height_idx), weights->dimension(width_idx), weights->dimension(height_idx),
info.pad().first, info.pad().second, stride_x, stride_y);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights);
-
if(is_data_type_quantized_asymmetric(input->data_type()))
{
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::S32);
@@ -90,10 +99,10 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf
TensorInfo scale_out_info(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape));
const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
- for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i)
- {
- ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != scale_out_info.dimension(i));
- }
+ const unsigned int batches_idx = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::BATCHES);
+ const unsigned int channel_idx = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::CHANNEL);
+ ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(batches_idx) != scale_out_info.dimension(batches_idx));
+ ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(channel_idx) != scale_out_info.dimension(channel_idx));
ARM_COMPUTE_RETURN_ON_ERROR(NEConvolutionLayer::validate(&scale_out_info, weights, bias, output, conv_info, WeightsInfo()));
@@ -103,21 +112,24 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf
void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output);
+ ARM_COMPUTE_ERROR_THROW_ON(NEDeconvolutionLayer::validate(input->info(), weights->info(), bias->info(), output->info(), info));
+
+ const DataLayout data_layout = input->info()->data_layout();
_input = input;
_original_weights = weights;
_info = info;
_is_prepared = false;
+ _is_nchw = data_layout == DataLayout::NCHW;
- const DataLayout data_layout = input->info()->data_layout();
- const unsigned int stride_x = info.stride().first;
- const unsigned int stride_y = info.stride().second;
+ const unsigned int stride_x = info.stride().first;
+ const unsigned int stride_y = info.stride().second;
- _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
- _flip_weights.configure(weights, &_weights_flipped);
-
- auto out_dims = deconvolution_output_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights->info()->dimension(0), weights->info()->dimension(1),
- info.pad().first, info.pad().second, stride_x, stride_y);
+ const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ auto out_dims = deconvolution_output_dimensions(input->info()->dimension(width_idx), input->info()->dimension(height_idx), weights->info()->dimension(width_idx),
+ weights->info()->dimension(height_idx),
+ info.pad().first, info.pad().second, stride_x, stride_y);
const TensorShape output_shape = compute_deconvolution_output_shape(out_dims, *input->info(), *weights->info());
// Output auto initialization if not yet initialized
@@ -128,20 +140,73 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con
_memory_group.manage(&_scaled_output);
- // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order to match output shape
- unsigned int padx = 0;
- unsigned int pady = 0;
- const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(), stride_x, stride_y, out_dims, padx, pady);
-
- TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info());
- _scaled_output.allocator()->init(scale_out_info);
-
- const PadStrideInfo upsample_info(stride_x, stride_y, padx / 2, pady / 2);
- _upsample_f.configure(input, &_scaled_output, upsample_info);
-
- // setup the function to convolve the upscaled output
- const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
- _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info);
+ if(!_is_nchw)
+ {
+ _memory_group.manage(&_permuted_input);
+ _memory_group.manage(&_permuted_weights);
+ _memory_group.manage(&_permuted_output);
+
+ // Configure the function to transform the input tensor from NHWC -> NCHW
+ _permuted_input.info()->set_quantization_info(input->info()->quantization_info());
+ _permute_input.configure(input, &_permuted_input, PermutationVector(1U, 2U, 0U));
+ _permuted_input.info()->set_data_layout(DataLayout::NCHW);
+
+ // Configure the function to transform the weights tensor from NHWC -> NCHW
+ _permuted_weights.info()->set_quantization_info(weights->info()->quantization_info());
+ _permute_weights.configure(weights, &_permuted_weights, PermutationVector(1U, 2U, 0U));
+ _permuted_weights.info()->set_data_layout(DataLayout::NCHW);
+
+ // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order to match output shape
+ unsigned int padx = 0;
+ unsigned int pady = 0;
+ const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*_permuted_input.info(), *_permuted_weights.info(), stride_x, stride_y, out_dims, padx,
+ pady);
+
+ TensorInfo scale_out_info(scale_out_shape, 1, _permuted_input.info()->data_type(), _permuted_input.info()->quantization_info());
+ scale_out_info.set_data_layout(DataLayout::NCHW);
+ _scaled_output.allocator()->init(scale_out_info);
+
+ const PadStrideInfo upsample_info(stride_x, stride_y, padx / 2, pady / 2);
+ _upsample_f.configure(&_permuted_input, &_scaled_output, upsample_info);
+
+ _weights_flipped.allocator()->init(*_permuted_weights.info()->clone());
+ _weights_flipped.info()->set_quantization_info(weights->info()->quantization_info());
+ _flip_weights.configure(&_permuted_weights, &_weights_flipped);
+
+ // setup the function to convolve the upscaled output
+ const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+
+ _permuted_output.info()->set_quantization_info(output->info()->quantization_info());
+ _conv_f.configure(&_scaled_output, &_weights_flipped, bias, &_permuted_output, conv_info);
+ _permuted_output.info()->set_data_layout(DataLayout::NCHW);
+
+ // Configure the function to transform the convoluted output to NHWC
+ _permute_output.configure(&_permuted_output, output, PermutationVector(2U, 0U, 1U));
+
+ _permuted_input.allocator()->allocate();
+ _permuted_weights.allocator()->allocate();
+ _permuted_output.allocator()->allocate();
+ }
+ else
+ {
+ // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order to match output shape
+ unsigned int padx = 0;
+ unsigned int pady = 0;
+ const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(), stride_x, stride_y, out_dims, padx, pady);
+
+ TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info());
+ scale_out_info.set_data_layout(data_layout);
+ _scaled_output.allocator()->init(scale_out_info);
+ const PadStrideInfo upsample_info(stride_x, stride_y, padx / 2, pady / 2);
+ _upsample_f.configure(input, &_scaled_output, upsample_info);
+
+ _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
+ _flip_weights.configure(weights, &_weights_flipped);
+
+ // setup the function to convolve the upscaled output
+ const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
+ _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info);
+ }
_scaled_output.allocator()->allocate();
}
@@ -151,8 +216,20 @@ void NEDeconvolutionLayer::run()
MemoryGroupResourceScope scope_mg(_memory_group);
+ // Permute input
+ if(!_is_nchw)
+ {
+ _permute_input.run();
+ }
+
_upsample_f.run();
_conv_f.run();
+
+ // Permute output
+ if(!_is_nchw)
+ {
+ _permute_output.run();
+ }
}
void NEDeconvolutionLayer::prepare()
@@ -163,6 +240,11 @@ void NEDeconvolutionLayer::prepare()
// Run weights flipping and mark original weights tensor as unused
_weights_flipped.allocator()->allocate();
+ // Permute weights
+ if(!_is_nchw)
+ {
+ _permute_weights.run();
+ }
NEScheduler::get().schedule(&_flip_weights, Window::DimZ);
_original_weights->mark_as_unused();