From 0223a78ba43f0b106347108857d2f8cbfe857198 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 12 Dec 2017 11:44:44 +0000 Subject: COMPMID-556: Fix bugs around NEDirectConvolutionLayer Change-Id: Ib4af25cd6dae78ed4ec89f4272cfaa2356359446 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/112867 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com Reviewed-by: Anthony Barbier --- .../NEON/functions/NEDirectConvolutionLayer.h | 5 +- ...EDirectConvolutionLayerBiasAccumulateKernel.cpp | 1 + .../kernels/NEDirectConvolutionLayerKernel.cpp | 16 +++-- src/core/NEON/kernels/NEFillBorderKernel.cpp | 4 +- .../NEON/functions/NEDirectConvolutionLayer.cpp | 72 +++++++++++----------- 5 files changed, 54 insertions(+), 44 deletions(-) diff --git a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h index 6d8ce1f6f4..09a54968bb 100644 --- a/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h @@ -62,7 +62,7 @@ public: * Supported sizes: 1x1, 3x3 and 5x5. * The 3rd dimension must be the same as the input's volume 3rd dimension. * Data type supported: Same as @p input. - * @param[in] bias Set of biases. Data type supported: Same as @p input. + * @param[in] bias Set of biases. Can be nullptr. Data type supported: Same as @p input. * @param[out] output Output tensor. * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. @@ -80,7 +80,7 @@ public: * Supported sizes: 1x1, 3x3 and 5x5. * The 3rd dimension must be the same as the input's volume 3rd dimension. * Data type supported: Same as @p input. - * @param[in] bias Set of biases. Data type supported: Same as @p input. + * @param[in] bias Set of biases. Can be nullptr. Data type supported: Same as @p input. * @param[in] output Output tensor. * The 3rd dimensions must be equal to the 4th dimension of the @p kernels tensor. Data types supported: Same as @p input. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. @@ -98,6 +98,7 @@ private: NEDirectConvolutionLayerKernel _conv_kernel; NEFillBorderKernel _input_border_handler; Tensor _accumulator; + bool _has_bias; }; } #endif /* __ARM_COMPUTE_NEDIRECTCONVOLUTIONLAYER_H__ */ diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp index a6585ade12..65b7087d7e 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp @@ -244,6 +244,7 @@ void NEDirectConvolutionLayerBiasAccumulateKernel::configure(ITensor *input, con { ARM_COMPUTE_ERROR_ON_NULLPTR(input, bias); + // Auto-initialize output output if required if(output != nullptr) { // Output tensor auto initialization if not yet initialized diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp index 1ca213b04a..2ba0ef2e69 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp @@ -1048,7 +1048,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, } std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *weights, ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int &num_weight_elems_read_per_row, - unsigned int &num_elems_read_per_iteration, unsigned int &num_elems_written_per_iteration) + unsigned int &num_elems_read_per_iteration, unsigned int &num_elems_written_per_iteration, BorderSize &border_size) { // Calculate right and bottom border unsigned int kernel_size = weights->dimension(0); @@ -1056,7 +1056,6 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen const unsigned int conv_pad_y = std::get<1>(conv_info.pad()); const unsigned int conv_stride_x = std::get<0>(conv_info.stride()); const unsigned int conv_stride_y = std::get<1>(conv_info.stride()); - BorderSize border_size = BorderSize(conv_pad_y, conv_pad_x); const int input_width = input->dimension(0); const int input_height = input->dimension(1); @@ -1182,7 +1181,7 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens // Configure kernel window auto win_config = validate_and_configure_window(input->info(), weights->info(), output->info(), conv_info, _num_weight_elems_read_per_row, - _num_elems_read_per_iteration, _num_elems_written_per_iteration); + _num_elems_read_per_iteration, _num_elems_written_per_iteration, _border_size); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); INEKernel::configure(win_config.second); } @@ -1192,9 +1191,16 @@ Status NEDirectConvolutionLayerKernel::validate(const ITensorInfo *input, const unsigned int num_weight_elems_read_per_row = 0; unsigned int num_elems_read_per_iteration = 0; unsigned int num_elems_written_per_iteration = 0; + BorderSize border_size(conv_info.pad().first, conv_info.pad().second); ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, output, conv_info)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), weights->clone().get(), output->clone().get(), conv_info, num_weight_elems_read_per_row, num_elems_read_per_iteration, - num_elems_written_per_iteration) + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), + weights->clone().get(), + output->clone().get(), + conv_info, + num_weight_elems_read_per_row, + num_elems_read_per_iteration, + num_elems_written_per_iteration, + border_size) .first); return Status{}; diff --git a/src/core/NEON/kernels/NEFillBorderKernel.cpp b/src/core/NEON/kernels/NEFillBorderKernel.cpp index c66e057f23..af04955608 100644 --- a/src/core/NEON/kernels/NEFillBorderKernel.cpp +++ b/src/core/NEON/kernels/NEFillBorderKernel.cpp @@ -47,8 +47,8 @@ inline void fill_constant_value_single_channel_special(ITensor *t float border_value; constant_border_value.get(border_value); uint8_t *const start_valid_region = tensor->ptr_to_element(tensor->info()->valid_region().anchor); - const size_t &width = tensor->info()->valid_region().shape[0]; - const size_t &height = tensor->info()->valid_region().shape[1]; + const size_t width = tensor->info()->valid_region().shape[0]; + const size_t height = tensor->info()->valid_region().shape[1]; const int stridey = tensor->info()->strides_in_bytes()[1]; // Left and right border diff --git a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp index 2eabe459a5..ef5d987832 100644 --- a/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp @@ -34,7 +34,7 @@ using namespace arm_compute; NEDirectConvolutionLayer::NEDirectConvolutionLayer(std::shared_ptr memory_manager) - : _memory_group(std::move(memory_manager)), _accumulate_bias_kernel(), _conv_kernel(), _input_border_handler(), _accumulator() + : _memory_group(std::move(memory_manager)), _accumulate_bias_kernel(), _conv_kernel(), _input_border_handler(), _accumulator(), _has_bias(false) { } @@ -46,38 +46,29 @@ void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights, _accumulator.allocator()->free(); } + // Check if bias should be added in the convolution result + _has_bias = (bias != nullptr); + // Allocate the intermediate accumulator tensor in case of fixed point input - switch(output->info()->data_type()) + if(is_data_type_fixed_point(input->info()->data_type())) { - case DataType::QS8: - { - _accumulator.allocator()->init(TensorInfo(output->info()->tensor_shape(), 1, DataType::QS16, output->info()->fixed_point_position())); - _memory_group.manage(&_accumulator); - _conv_kernel.configure(input, weights, &_accumulator, conv_info); - _accumulate_bias_kernel.configure(&_accumulator, bias, output); - _accumulator.allocator()->allocate(); - break; - } - case DataType::QS16: + const DataType promoted_dt = (input->info()->data_type() == DataType::QS8) ? DataType::QS16 : DataType::QS32; + _accumulator.allocator()->init(TensorInfo(output->info()->tensor_shape(), 1, promoted_dt, output->info()->fixed_point_position())); + _memory_group.manage(&_accumulator); + _conv_kernel.configure(input, weights, &_accumulator, conv_info); + // TODO (COMPMID-746): Fix accumulate biases to just down-cast when no bias is provided + if(_has_bias) { - _accumulator.allocator()->init(TensorInfo(output->info()->tensor_shape(), 1, DataType::QS32, output->info()->fixed_point_position())); - _memory_group.manage(&_accumulator); - _conv_kernel.configure(input, weights, &_accumulator, conv_info); _accumulate_bias_kernel.configure(&_accumulator, bias, output); - _accumulator.allocator()->allocate(); - break; } - case DataType::F16: - case DataType::F32: + _accumulator.allocator()->allocate(); + } + else + { + _conv_kernel.configure(input, weights, output, conv_info); + if(_has_bias) { - _conv_kernel.configure(input, weights, output, conv_info); _accumulate_bias_kernel.configure(output, bias); - break; - } - default: - { - ARM_COMPUTE_ERROR("Data type not supported"); - break; } } @@ -87,7 +78,7 @@ void NEDirectConvolutionLayer::configure(ITensor *input, const ITensor *weights, Status NEDirectConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &conv_info) { - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, bias, output); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); DataType data_type = output->data_type(); if(is_data_type_fixed_point(data_type)) @@ -97,14 +88,22 @@ Status NEDirectConvolutionLayer::validate(const ITensorInfo *input, const ITenso } TensorInfo accumulator(output->clone()->set_is_resizable(true).reset_padding().set_data_type(data_type)); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, bias); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(bias->dimension(0) != weights->dimension(3), - "Biases size and number of input feature maps should match"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(bias->num_dimensions() > 1, - "Biases should be one dimensional"); - + // Validate Convolution kernel ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerKernel::validate(input, weights, &accumulator, conv_info)); - ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerBiasAccumulateKernel::validate(&accumulator, bias, output)); + + // Validate bias + ARM_COMPUTE_RETURN_ERROR_ON_MSG((bias == nullptr) && is_data_type_fixed_point(data_type), + "Biases should be provided for fixed point inputs"); + if(bias != nullptr) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, bias); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(bias->dimension(0) != weights->dimension(3), + "Biases size and number of input feature maps should match"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(bias->num_dimensions() > 1, "Biases should be one dimensional"); + + // Validate bias kernel + ARM_COMPUTE_RETURN_ON_ERROR(NEDirectConvolutionLayerBiasAccumulateKernel::validate(&accumulator, bias, output)); + } return Status{}; } @@ -116,7 +115,10 @@ void NEDirectConvolutionLayer::run() _memory_group.acquire(); NEScheduler::get().schedule(&_conv_kernel, Window::DimZ); - NEScheduler::get().schedule(&_accumulate_bias_kernel, Window::DimY); + if(_has_bias) + { + NEScheduler::get().schedule(&_accumulate_bias_kernel, Window::DimY); + } _memory_group.release(); } -- cgit v1.2.1