aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels')
-rw-r--r--src/core/NEON/kernels/NECropKernel.cpp2
-rw-r--r--src/core/NEON/kernels/NEDilateKernel.cpp5
-rw-r--r--src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp14
-rw-r--r--src/core/NEON/kernels/NEErodeKernel.cpp5
-rw-r--r--src/core/NEON/kernels/NEFillBorderKernel.cpp8
-rw-r--r--src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp4
-rw-r--r--src/core/NEON/kernels/NELKTrackerKernel.cpp12
-rw-r--r--src/core/NEON/kernels/NEPoolingLayerKernel.cpp3
-rw-r--r--src/core/NEON/kernels/NERemapKernel.cpp10
-rw-r--r--src/core/NEON/kernels/NEScaleKernel.cpp5
10 files changed, 37 insertions, 31 deletions
diff --git a/src/core/NEON/kernels/NECropKernel.cpp b/src/core/NEON/kernels/NECropKernel.cpp
index b6fe5819e4..f16eb3e6bd 100644
--- a/src/core/NEON/kernels/NECropKernel.cpp
+++ b/src/core/NEON/kernels/NECropKernel.cpp
@@ -178,7 +178,7 @@ inline void out_of_bounds_crop_window(const ITensor *output, float *output_ptr,
template <bool is_height_flipped, bool has_cols_in_bounds, bool has_cols_out_of_bounds_before, bool has_cols_out_of_bounds_after>
inline void execute_window(const ITensor *input, const ITensor *output, Coordinates input_offset, float extrapolation_value,
- const uint32_t rows_out_of_bounds[], const uint32_t cols_out_of_bounds[], NECropKernel::InBoundsCropFunction *in_bounds_crop_function)
+ const std::array<uint32_t, 2> &rows_out_of_bounds, const std::array<uint32_t, 2> &cols_out_of_bounds, NECropKernel::InBoundsCropFunction *in_bounds_crop_function)
{
// Output is always float.
const int window_step_x = 16 / sizeof(float);
diff --git a/src/core/NEON/kernels/NEDilateKernel.cpp b/src/core/NEON/kernels/NEDilateKernel.cpp
index 3ee00a47d3..e761815f9e 100644
--- a/src/core/NEON/kernels/NEDilateKernel.cpp
+++ b/src/core/NEON/kernels/NEDilateKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -94,7 +94,8 @@ void NEDilateKernel::run(const Window &window, const ThreadInfo &info)
uint8x8_t bot_high_data = vget_high_u8(bot_data);
uint8x8_t bot_low_data = vget_low_u8(bot_data);
- uint8x8_t p0, p1;
+ uint8x8_t p0;
+ uint8x8_t p1;
p0 = top_low_data;
p1 = vext_u8(top_low_data, top_high_data, 1);
diff --git a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
index 162c4b1ace..d557cfa1bd 100644
--- a/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDirectConvolutionLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -192,12 +192,12 @@ public:
execute_window_loop(window_out, [&](const Coordinates & id)
{
- const uint8_t *input_ptr = in.ptr() - conv_pad_left * input_stride_x - conv_pad_top * input_stride_y;
- uint8_t *out_ptr = out.ptr();
- int ih = 0;
- int oh = 0;
- float32x4_t accum0[small_tensor_size_optim] = { vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0) };
- float32x4_t accum1[small_tensor_size_optim] = { vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0) };
+ const uint8_t *input_ptr = in.ptr() - conv_pad_left * input_stride_x - conv_pad_top * input_stride_y;
+ uint8_t *out_ptr = out.ptr();
+ int ih = 0;
+ int oh = 0;
+ std::array<float32x4_t, 8> accum0 = { vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0) };
+ std::array<float32x4_t, 8> accum1 = { vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0), vdupq_n_f32(0) };
for(int oz = 0; oz < range_z; ++oz)
{
accum0[0] = accum0[1] = accum0[2] = accum0[3] = accum0[4] = accum0[5] = accum0[6] = accum0[7] = vdupq_n_f32(0.f);
diff --git a/src/core/NEON/kernels/NEErodeKernel.cpp b/src/core/NEON/kernels/NEErodeKernel.cpp
index 88c20f8174..2a538ecd0f 100644
--- a/src/core/NEON/kernels/NEErodeKernel.cpp
+++ b/src/core/NEON/kernels/NEErodeKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -94,7 +94,8 @@ void NEErodeKernel::run(const Window &window, const ThreadInfo &info)
uint8x8_t bot_high_data = vget_high_u8(bot_data);
uint8x8_t bot_low_data = vget_low_u8(bot_data);
- uint8x8_t p0, p1;
+ uint8x8_t p0;
+ uint8x8_t p1;
p0 = top_low_data;
p1 = vext_u8(top_low_data, top_high_data, 1);
diff --git a/src/core/NEON/kernels/NEFillBorderKernel.cpp b/src/core/NEON/kernels/NEFillBorderKernel.cpp
index f4046e0851..4127dc8fbd 100644
--- a/src/core/NEON/kernels/NEFillBorderKernel.cpp
+++ b/src/core/NEON/kernels/NEFillBorderKernel.cpp
@@ -168,7 +168,7 @@ void NEFillBorderKernel::fill_replicate_single_channel(const Window &window)
Iterator vertical_it(_tensor, vertical);
- execute_window_loop(vertical, [&](const Coordinates & id)
+ execute_window_loop(vertical, [&](const Coordinates &)
{
uint8_t *base_addr = start_valid_region + vertical_it.offset();
// Fill left and right borders
@@ -188,7 +188,7 @@ void NEFillBorderKernel::fill_replicate_single_channel(const Window &window)
Iterator plane_it(_tensor, window);
// Iterate over all XY planes
- execute_window_loop(window, [&](const Coordinates & id)
+ execute_window_loop(window, [&](const Coordinates &)
{
uint8_t *base_addr = start_valid_region + plane_it.offset();
// Top border
@@ -224,7 +224,7 @@ void NEFillBorderKernel::fill_constant_value_single_channel(const Window &window
Iterator vertical_it(_tensor, vertical);
- execute_window_loop(vertical, [&](const Coordinates & id)
+ execute_window_loop(vertical, [&](const Coordinates &)
{
uint8_t *base_addr = start_valid_region + vertical_it.offset();
// Fill left and right borders
@@ -244,7 +244,7 @@ void NEFillBorderKernel::fill_constant_value_single_channel(const Window &window
Iterator plane_it(_tensor, window);
// Iterate over all XY planes
- execute_window_loop(window, [&](const Coordinates & id)
+ execute_window_loop(window, [&](const Coordinates &)
{
uint8_t *base_addr = start_valid_region + plane_it.offset();
// Top border
diff --git a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp
index 7769d9eb8c..c929983162 100644
--- a/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.cpp
@@ -126,7 +126,7 @@ void gemm_interleave_16bit_elements(const ITensor *input, ITensor *output, const
win_out.set_dimension_step(Window::DimX, 16);
Iterator out(output, win_out);
- execute_window_loop(window, [&](const Coordinates & id)
+ execute_window_loop(window, [&](const Coordinates &)
{
const uint16x4x4_t data =
{
@@ -154,7 +154,7 @@ void gemm_interleave_32bit_elements(const ITensor *input, ITensor *output, const
win_out.set_dimension_step(Window::DimX, 16);
Iterator out(output, win_out);
- execute_window_loop(window, [&](const Coordinates & id)
+ execute_window_loop(window, [&](const Coordinates &)
{
const uint32x4x4_t data =
{
diff --git a/src/core/NEON/kernels/NELKTrackerKernel.cpp b/src/core/NEON/kernels/NELKTrackerKernel.cpp
index 83593e7f0d..ddf869e303 100644
--- a/src/core/NEON/kernels/NELKTrackerKernel.cpp
+++ b/src/core/NEON/kernels/NELKTrackerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -405,9 +405,9 @@ void NELKTrackerKernel::run(const Window &window, const ThreadInfo &info)
init_keypoints(list_start, list_end);
- const int buffer_size = _window_dimension * _window_dimension;
- int32_t bilinear_ix[buffer_size];
- int32_t bilinear_iy[buffer_size];
+ const int buffer_size = _window_dimension * _window_dimension;
+ std::vector<int32_t> bilinear_ix(buffer_size);
+ std::vector<int32_t> bilinear_iy(buffer_size);
const int half_window = _window_dimension / 2;
@@ -444,7 +444,7 @@ void NELKTrackerKernel::run(const Window &window, const ThreadInfo &info)
int iA12 = 0;
int iA22 = 0;
- std::tie(iA11, iA12, iA22) = compute_spatial_gradient_matrix(old_keypoint, bilinear_ix, bilinear_iy);
+ std::tie(iA11, iA12, iA22) = compute_spatial_gradient_matrix(old_keypoint, bilinear_ix.data(), bilinear_iy.data());
const float A11 = iA11 * FLT_SCALE;
const float A12 = iA12 * FLT_SCALE;
@@ -490,7 +490,7 @@ void NELKTrackerKernel::run(const Window &window, const ThreadInfo &info)
int ib1 = 0;
int ib2 = 0;
- std::tie(ib1, ib2) = compute_image_mismatch_vector(old_keypoint, new_keypoint, bilinear_ix, bilinear_iy);
+ std::tie(ib1, ib2) = compute_image_mismatch_vector(old_keypoint, new_keypoint, bilinear_ix.data(), bilinear_iy.data());
double b1 = ib1 * FLT_SCALE;
double b2 = ib2 * FLT_SCALE;
diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
index 0b90d9f290..ac2ffa1988 100644
--- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
@@ -352,7 +352,8 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_pool_info(pool_size.x(), pool_size.y()));
// Check output dimensions
- unsigned int pooled_w, pooled_h;
+ unsigned int pooled_w;
+ unsigned int pooled_h;
std::tie(pooled_w, pooled_h) = scaled_dimensions(input->info()->dimension(idx_width),
input->info()->dimension(idx_height),
pool_size.x(),
diff --git a/src/core/NEON/kernels/NERemapKernel.cpp b/src/core/NEON/kernels/NERemapKernel.cpp
index edb3ffe1df..3c871de73a 100644
--- a/src/core/NEON/kernels/NERemapKernel.cpp
+++ b/src/core/NEON/kernels/NERemapKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -113,8 +113,8 @@ void NERemapKernel::configure(const ITensor *input, const ITensor *map_x, const
AccessWindowStatic input_access(input->info(), -border_size().left, -border_size().top, access_right, input->info()->dimension(1) + border_size().bottom);
AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal mapx_access(map_x->info(), 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal mapy_access(map_y->info(), 0, num_elems_processed_per_iteration);
+ AccessWindowHorizontal mapx_access(map_x->info(), 0, num_elems_processed_per_iteration);
+ AccessWindowHorizontal mapy_access(map_y->info(), 0, num_elems_processed_per_iteration);
update_window_and_padding(win, input_access, mapx_access, mapy_access, output_access);
@@ -140,7 +140,7 @@ void NERemapKernel::remap_nearest(const Window &window)
const float32x4_t height = vdupq_n_f32(static_cast<float>(_input->info()->dimension(1)));
const int32x4_t in_stride = vdupq_n_s32(static_cast<int32_t>(_input->info()->strides_in_bytes()[1]));
- execute_window_loop(window, [&](const Coordinates & id)
+ execute_window_loop(window, [&](const Coordinates &)
{
const auto mapx_ptr = reinterpret_cast<const float *>(mapx.ptr());
const auto mapy_ptr = reinterpret_cast<const float *>(mapy.ptr());
@@ -190,7 +190,7 @@ void NERemapKernel::remap_bilinear(const Window &window)
const size_t height = _input->info()->dimension(1);
const size_t in_stride = _input->info()->strides_in_bytes()[1];
- execute_window_loop(window, [&](const Coordinates & id)
+ execute_window_loop(window, [&](const Coordinates &)
{
const auto mapx_ptr = reinterpret_cast<float *>(mapx.ptr());
const auto mapy_ptr = reinterpret_cast<float *>(mapy.ptr());
diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp
index 8c4a70cdad..33540393e4 100644
--- a/src/core/NEON/kernels/NEScaleKernel.cpp
+++ b/src/core/NEON/kernels/NEScaleKernel.cpp
@@ -249,7 +249,10 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset
if(is_valid(offset, -border_size, input_width - 1 + border_size, in_yi, -border_size, input_height - 1 + border_size))
{
- T a00 = 0, a01 = 0, a10 = 0, a11 = 0;
+ T a00 = 0;
+ T a01 = 0;
+ T a10 = 0;
+ T a11 = 0;
if(border_mode == BorderMode::CONSTANT)
{