aboutsummaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2017-12-12 11:44:44 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:42:33 +0000
commit0223a78ba43f0b106347108857d2f8cbfe857198 (patch)
treee6751b5418534595d890f401929284fe6d198f4b /src/core
parent1568621e07cef67c5bb01fa4cc827e218302040c (diff)
downloadComputeLibrary-0223a78ba43f0b106347108857d2f8cbfe857198.tar.gz
COMPMID-556: Fix bugs around NEDirectConvolutionLayer
Change-Id: Ib4af25cd6dae78ed4ec89f4272cfaa2356359446 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/112867 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp1
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp16
-rw-r--r--src/core/NEON/kernels/NEFillBorderKernel.cpp4
3 files changed, 14 insertions, 7 deletions
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp
index a6585ade12..65b7087d7e 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerBiasAccumulateKernel.cpp
@@ -244,6 +244,7 @@ void NEDirectConvolutionLayerBiasAccumulateKernel::configure(ITensor *input, con
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, bias);
+ // Auto-initialize output output if required
if(output != nullptr)
{
// Output tensor auto initialization if not yet initialized
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
index 1ca213b04a..2ba0ef2e69 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
@@ -1048,7 +1048,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights,
}
std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *weights, ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int &num_weight_elems_read_per_row,
- unsigned int &num_elems_read_per_iteration, unsigned int &num_elems_written_per_iteration)
+ unsigned int &num_elems_read_per_iteration, unsigned int &num_elems_written_per_iteration, BorderSize &border_size)
{
// Calculate right and bottom border
unsigned int kernel_size = weights->dimension(0);
@@ -1056,7 +1056,6 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
const unsigned int conv_pad_y = std::get<1>(conv_info.pad());
const unsigned int conv_stride_x = std::get<0>(conv_info.stride());
const unsigned int conv_stride_y = std::get<1>(conv_info.stride());
- BorderSize border_size = BorderSize(conv_pad_y, conv_pad_x);
const int input_width = input->dimension(0);
const int input_height = input->dimension(1);
@@ -1182,7 +1181,7 @@ void NEDirectConvolutionLayerKernel::configure(const ITensor *input, const ITens
// Configure kernel window
auto win_config = validate_and_configure_window(input->info(), weights->info(), output->info(), conv_info, _num_weight_elems_read_per_row,
- _num_elems_read_per_iteration, _num_elems_written_per_iteration);
+ _num_elems_read_per_iteration, _num_elems_written_per_iteration, _border_size);
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
INEKernel::configure(win_config.second);
}
@@ -1192,9 +1191,16 @@ Status NEDirectConvolutionLayerKernel::validate(const ITensorInfo *input, const
unsigned int num_weight_elems_read_per_row = 0;
unsigned int num_elems_read_per_iteration = 0;
unsigned int num_elems_written_per_iteration = 0;
+ BorderSize border_size(conv_info.pad().first, conv_info.pad().second);
ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, output, conv_info));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), weights->clone().get(), output->clone().get(), conv_info, num_weight_elems_read_per_row, num_elems_read_per_iteration,
- num_elems_written_per_iteration)
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(),
+ weights->clone().get(),
+ output->clone().get(),
+ conv_info,
+ num_weight_elems_read_per_row,
+ num_elems_read_per_iteration,
+ num_elems_written_per_iteration,
+ border_size)
.first);
return Status{};
diff --git a/src/core/NEON/kernels/NEFillBorderKernel.cpp b/src/core/NEON/kernels/NEFillBorderKernel.cpp
index c66e057f23..af04955608 100644
--- a/src/core/NEON/kernels/NEFillBorderKernel.cpp
+++ b/src/core/NEON/kernels/NEFillBorderKernel.cpp
@@ -47,8 +47,8 @@ inline void fill_constant_value_single_channel_special<float, 1u, 1u>(ITensor *t
float border_value;
constant_border_value.get(border_value);
uint8_t *const start_valid_region = tensor->ptr_to_element(tensor->info()->valid_region().anchor);
- const size_t &width = tensor->info()->valid_region().shape[0];
- const size_t &height = tensor->info()->valid_region().shape[1];
+ const size_t width = tensor->info()->valid_region().shape[0];
+ const size_t height = tensor->info()->valid_region().shape[1];
const int stridey = tensor->info()->strides_in_bytes()[1];
// Left and right border