diff options
Diffstat (limited to 'src/core/NEON/kernels')
3 files changed, 14 insertions, 7 deletions
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp index a6585ade12..65b7087d7e 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp @@ -244,6 +244,7 @@ void NEDirectConvolutionLayerBiasAccumulateKernel::configure(ITensor *input, con { ARM_COMPUTE_ERROR_ON_NULLPTR(input, bias); + // Auto-initialize output output if required if(output != nullptr) { // Output tensor auto initialization if not yet initialized diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp index 1ca213b04a..2ba0ef2e69 100644 --- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp @@ -1048,7 +1048,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, } std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *weights, ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int &num_weight_elems_read_per_row, - unsigned int &num_elems_read_per_iteration, unsigned int &num_elems_written_per_iteration) + unsigned int &num_elems_read_per_iteration, unsigned int &num_elems_written_per_iteration, BorderSize &border_size) { // Calculate right and bottom border unsigned int kernel_size = weights->dimension(0); @@ -1056,7 +1056,6 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen const unsigned int conv_pad_y = std::get<1>(conv_info.pad()); const unsigned int conv_stride_x = std::get<0>(conv_info.stride()); const unsigned int conv_stride_y = std::get<1>(conv_info.stride()); - BorderSize border_size = BorderSize(conv_pad_y, conv_pad_x); const int input_width = input->dimension(0); const int input_height = input->dimension(1); @@ -1182,7 +1181,7 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens // Configure kernel window auto win_config = validate_and_configure_window(input->info(), weights->info(), output->info(), conv_info, _num_weight_elems_read_per_row, - _num_elems_read_per_iteration, _num_elems_written_per_iteration); + _num_elems_read_per_iteration, _num_elems_written_per_iteration, _border_size); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); INEKernel::configure(win_config.second); } @@ -1192,9 +1191,16 @@ Status NEDirectConvolutionLayerKernel::validate(const ITensorInfo *input, const unsigned int num_weight_elems_read_per_row = 0; unsigned int num_elems_read_per_iteration = 0; unsigned int num_elems_written_per_iteration = 0; + BorderSize border_size(conv_info.pad().first, conv_info.pad().second); ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, output, conv_info)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), weights->clone().get(), output->clone().get(), conv_info, num_weight_elems_read_per_row, num_elems_read_per_iteration, - num_elems_written_per_iteration) + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), + weights->clone().get(), + output->clone().get(), + conv_info, + num_weight_elems_read_per_row, + num_elems_read_per_iteration, + num_elems_written_per_iteration, + border_size) .first); return Status{}; diff --git a/src/core/NEON/kernels/NEFillBorderKernel.cpp b/src/core/NEON/kernels/NEFillBorderKernel.cpp index c66e057f23..af04955608 100644 --- a/src/core/NEON/kernels/NEFillBorderKernel.cpp +++ b/src/core/NEON/kernels/NEFillBorderKernel.cpp @@ -47,8 +47,8 @@ inline void fill_constant_value_single_channel_special<float, 1u, 1u>(ITensor *t float border_value; constant_border_value.get(border_value); uint8_t *const start_valid_region = tensor->ptr_to_element(tensor->info()->valid_region().anchor); - const size_t &width = tensor->info()->valid_region().shape[0]; - const size_t &height = tensor->info()->valid_region().shape[1]; + const size_t width = tensor->info()->valid_region().shape[0]; + const size_t height = tensor->info()->valid_region().shape[1]; const int stridey = tensor->info()->strides_in_bytes()[1]; // Left and right border |