aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels')
-rw-r--r--src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp7
-rw-r--r--src/core/NEON/kernels/NEIm2ColKernel.cpp19
-rw-r--r--src/core/NEON/kernels/NEPoolingLayerKernel.cpp22
-rw-r--r--src/core/NEON/kernels/NEReductionOperationKernel.cpp38
-rw-r--r--src/core/NEON/kernels/NEScaleKernel.cpp4
-rw-r--r--src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp10
6 files changed, 38 insertions, 62 deletions
diff --git a/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp b/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp
index df631c3c03..98b0c106db 100644
--- a/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp
@@ -63,7 +63,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, i
} // namespace
NEDepthToSpaceLayerKernel::NEDepthToSpaceLayerKernel()
- : _input(nullptr), _output(nullptr), _block_shape()
+ : _input(nullptr), _output(nullptr), _block_shape(), _data_layout(DataLayout::UNKNOWN)
{
}
@@ -80,6 +80,7 @@ void NEDepthToSpaceLayerKernel::configure(const ITensor *input, ITensor *output,
_input = input;
_output = output;
_block_shape = block_shape;
+ _data_layout = input->info()->data_layout();
// Configure kernel window
Window win = calculate_max_window(*input->info(), Steps());
@@ -99,7 +100,7 @@ void NEDepthToSpaceLayerKernel::run(const Window &window, const ThreadInfo &info
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window);
- const int idx_channel = get_data_layout_dimension_index(_input->info()->data_layout(), DataLayoutDimension::CHANNEL);
+ const int idx_channel = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::CHANNEL);
const int depth_size = _input->info()->dimension(idx_channel);
const int r = (depth_size / (_block_shape * _block_shape));
const int element_size = _input->info()->element_size();
@@ -112,7 +113,7 @@ void NEDepthToSpaceLayerKernel::run(const Window &window, const ThreadInfo &info
slice_out.set(Window::DimZ, Window::Dimension(0, 0, 0));
// Main loop for NCHW and NHWC
- if(_input->info()->data_layout() == DataLayout::NCHW)
+ if(_data_layout == DataLayout::NCHW)
{
Window slice_in = window.first_slice_window_2D();
do
diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp
index 0641d6cfa3..27c3d66b4f 100644
--- a/src/core/NEON/kernels/NEIm2ColKernel.cpp
+++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -265,10 +265,9 @@ void NEIm2ColKernel::run_im2col(const Window &window)
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
- const DataLayout data_layout = _input->info()->data_layout();
- const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
- const unsigned int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+ const unsigned int width_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH);
+ const unsigned int height_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT);
+ const unsigned int channel_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::CHANNEL);
const int input_w = _input->info()->dimension(width_idx);
const int input_h = _input->info()->dimension(height_idx);
@@ -344,7 +343,7 @@ void NEIm2ColKernel::run_im2col(const Window &window)
}
NEIm2ColKernel::NEIm2ColKernel()
- : _func(), _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_width(0), _kernel_height(0), _has_bias(false), _dilation(1U, 1U)
+ : _func(), _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_width(0), _kernel_height(0), _has_bias(false), _dilation(1U, 1U), _data_layout(DataLayout::UNKNOWN)
{
}
@@ -355,9 +354,9 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), kernel_dims, conv_info, has_bias, dilation, num_groups));
ARM_COMPUTE_UNUSED(num_groups);
- const DataLayout data_layout = input->info()->data_layout();
- const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ _data_layout = input->info()->data_layout();
+ const unsigned int width_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH);
+ const unsigned int height_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT);
_input = input;
_output = output;
@@ -370,7 +369,7 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size
_conv_info, _dilation);
_has_bias = has_bias;
- if(data_layout == DataLayout::NCHW)
+ if(_data_layout == DataLayout::NCHW)
{
switch(_input->info()->data_type())
{
diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
index 62c9ca0d5e..14de4a19d8 100644
--- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
@@ -321,7 +321,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
} // namespace
NEPoolingLayerKernel::NEPoolingLayerKernel()
- : _func(nullptr), _input(nullptr), _output(nullptr), _pool_info(), _num_elems_processed_per_iteration(0), _border_size(0), _is_square(false)
+ : _func(nullptr), _input(nullptr), _output(nullptr), _pool_info(), _data_layout(DataLayout::UNKNOWN), _num_elems_processed_per_iteration(0), _border_size(0), _is_square(false)
{
}
@@ -364,14 +364,15 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info, pooled_w, pooled_h));
// Set instance variables
- _input = input;
- _output = output;
- _pool_info = pool_info;
- _is_square = (pool_size.x() == pool_size.y());
+ _input = input;
+ _output = output;
+ _pool_info = pool_info;
+ _data_layout = input->info()->data_layout();
+ _is_square = (pool_size.x() == pool_size.y());
// Get data type
const DataType data_type = input->info()->data_type();
- const bool is_nchw = data_layout == DataLayout::NCHW;
+ const bool is_nchw = _data_layout == DataLayout::NCHW;
if(data_type == DataType::QASYMM8)
{
@@ -1574,7 +1575,12 @@ void NEPoolingLayerKernel::poolingMxN_f32_nhwc(const Window &window_input, const
// Calculate square-root in case of l2 pooling
if(pooling_type == PoolingType::L2)
{
- vres = vmulq_f32(vres, vinvsqrtq_f32(vres));
+ float32x4_t l2_res = { static_cast<float>(sqrt(vgetq_lane_f32(vres, 0))),
+ static_cast<float>(sqrt(vgetq_lane_f32(vres, 1))),
+ static_cast<float>(sqrt(vgetq_lane_f32(vres, 2))),
+ static_cast<float>(sqrt(vgetq_lane_f32(vres, 3)))
+ };
+ vres = l2_res;
}
// Store result
@@ -1835,7 +1841,7 @@ void NEPoolingLayerKernel::run(const Window &window, const ThreadInfo &info)
const bool exclude_padding = _pool_info.exclude_padding();
Window window_input(window);
- if(_input->info()->data_layout() == DataLayout::NCHW)
+ if(_data_layout == DataLayout::NCHW)
{
// Set step for input in x and y direction for the input
unsigned int window_x_inc = 0;
diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
index ffa4fa3565..16cd6f77b4 100644
--- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -742,23 +742,8 @@ struct RedOpYZW
for(unsigned int dim = 0; dim < in_info.dimension(axis); ++dim)
{
- T *in_ptr;
- switch(axis)
- {
- case 1:
- in_ptr = reinterpret_cast<T *>(input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, dim)));
- break;
- case 2:
- in_ptr = reinterpret_cast<T *>(input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, 0, dim)));
- break;
- case 3:
- in_ptr = reinterpret_cast<T *>(input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, 0, 0, dim)));
- break;
- default:
- ARM_COMPUTE_ERROR("Not supported");
- }
+ const T *in_ptr = reinterpret_cast<T *>(input.ptr() + in_info.strides_in_bytes()[axis] * dim);
const auto vec_elements = wrapper::vloadq(in_ptr);
-
switch(op)
{
case ReductionOperation::SUM:
@@ -907,23 +892,8 @@ struct RedOpYZW_qasymm8
for(unsigned int index_dim = 0; index_dim < in_info.dimension(axis); ++index_dim)
{
- uint8_t *in_ptr;
- switch(axis)
- {
- case 1:
- in_ptr = input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, index_dim));
- break;
- case 2:
- in_ptr = input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, 0, index_dim));
- break;
- case 3:
- in_ptr = input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, 0, 0, index_dim));
- break;
- default:
- ARM_COMPUTE_ERROR("Not supported");
- }
- const auto vec_elements = wrapper::vloadq(in_ptr);
-
+ const uint8_t *in_ptr = input.ptr() + in_info.strides_in_bytes()[axis] * index_dim;
+ const auto vec_elements = wrapper::vloadq(in_ptr);
switch(op)
{
case ReductionOperation::SUM:
diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp
index a2a44fca18..5b8e196a2c 100644
--- a/src/core/NEON/kernels/NEScaleKernel.cpp
+++ b/src/core/NEON/kernels/NEScaleKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -909,7 +909,7 @@ void NEScaleKernel::scale_area_nchw(const Window &window)
void NEScaleKernel::scale_nhwc(const Window &window)
{
// Get data layout and width/height indices
- const DataLayout data_layout = _input->info()->data_layout();
+ const DataLayout data_layout = DataLayout::NHWC;
const int idx_channels = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
diff --git a/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp b/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp
index 4803365013..ffd2dc14bf 100644
--- a/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp
+++ b/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp
@@ -66,7 +66,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, i
} // namespace
NESpaceToDepthLayerKernel::NESpaceToDepthLayerKernel()
- : _input(nullptr), _output(nullptr), _block_shape()
+ : _input(nullptr), _output(nullptr), _block_shape(), _data_layout(DataLayout::UNKNOWN)
{
}
@@ -82,6 +82,7 @@ void NESpaceToDepthLayerKernel::configure(const ITensor *input, ITensor *output,
_input = input;
_block_shape = block_shape;
_output = output;
+ _data_layout = input->info()->data_layout();
// Configure kernel window
Window win = calculate_max_window(*output->info(), Steps());
@@ -100,9 +101,8 @@ void NESpaceToDepthLayerKernel::run(const Window &window, const ThreadInfo &info
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window);
- const DataLayout data_layout = _input->info()->data_layout();
- const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
- const int element_size = _input->info()->element_size();
+ const int channel_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::CHANNEL);
+ const int element_size = _input->info()->element_size();
const size_t channel_size = _input->info()->dimension(channel_idx);
@@ -111,7 +111,7 @@ void NESpaceToDepthLayerKernel::run(const Window &window, const ThreadInfo &info
int batch_id = 0;
// Main loop for NCHW and NHWC
- if(_output->info()->data_layout() == DataLayout::NCHW)
+ if(_data_layout == DataLayout::NCHW)
{
do
{