diff options
Diffstat (limited to 'src/core/NEON/kernels')
-rw-r--r-- | src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp | 7 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEIm2ColKernel.cpp | 19 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEPoolingLayerKernel.cpp | 22 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEReductionOperationKernel.cpp | 38 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEScaleKernel.cpp | 4 | ||||
-rw-r--r-- | src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp | 10 |
6 files changed, 38 insertions, 62 deletions
diff --git a/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp b/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp index df631c3c03..98b0c106db 100644 --- a/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp @@ -63,7 +63,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, i } // namespace NEDepthToSpaceLayerKernel::NEDepthToSpaceLayerKernel() - : _input(nullptr), _output(nullptr), _block_shape() + : _input(nullptr), _output(nullptr), _block_shape(), _data_layout(DataLayout::UNKNOWN) { } @@ -80,6 +80,7 @@ void NEDepthToSpaceLayerKernel::configure(const ITensor *input, ITensor *output, _input = input; _output = output; _block_shape = block_shape; + _data_layout = input->info()->data_layout(); // Configure kernel window Window win = calculate_max_window(*input->info(), Steps()); @@ -99,7 +100,7 @@ void NEDepthToSpaceLayerKernel::run(const Window &window, const ThreadInfo &info ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window); - const int idx_channel = get_data_layout_dimension_index(_input->info()->data_layout(), DataLayoutDimension::CHANNEL); + const int idx_channel = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::CHANNEL); const int depth_size = _input->info()->dimension(idx_channel); const int r = (depth_size / (_block_shape * _block_shape)); const int element_size = _input->info()->element_size(); @@ -112,7 +113,7 @@ void NEDepthToSpaceLayerKernel::run(const Window &window, const ThreadInfo &info slice_out.set(Window::DimZ, Window::Dimension(0, 0, 0)); // Main loop for NCHW and NHWC - if(_input->info()->data_layout() == DataLayout::NCHW) + if(_data_layout == DataLayout::NCHW) { Window slice_in = window.first_slice_window_2D(); do diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index 0641d6cfa3..27c3d66b4f 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -265,10 +265,9 @@ void NEIm2ColKernel::run_im2col(const Window &window) ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - const DataLayout data_layout = _input->info()->data_layout(); - const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const unsigned int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + const unsigned int width_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH); + const unsigned int height_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT); + const unsigned int channel_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::CHANNEL); const int input_w = _input->info()->dimension(width_idx); const int input_h = _input->info()->dimension(height_idx); @@ -344,7 +343,7 @@ void NEIm2ColKernel::run_im2col(const Window &window) } NEIm2ColKernel::NEIm2ColKernel() - : _func(), _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_width(0), _kernel_height(0), _has_bias(false), _dilation(1U, 1U) + : _func(), _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_width(0), _kernel_height(0), _has_bias(false), _dilation(1U, 1U), _data_layout(DataLayout::UNKNOWN) { } @@ -355,9 +354,9 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), kernel_dims, conv_info, has_bias, dilation, num_groups)); ARM_COMPUTE_UNUSED(num_groups); - const DataLayout data_layout = input->info()->data_layout(); - const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + _data_layout = input->info()->data_layout(); + const unsigned int width_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH); + const unsigned int height_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT); _input = input; _output = output; @@ -370,7 +369,7 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size _conv_info, _dilation); _has_bias = has_bias; - if(data_layout == DataLayout::NCHW) + if(_data_layout == DataLayout::NCHW) { switch(_input->info()->data_type()) { diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp index 62c9ca0d5e..14de4a19d8 100644 --- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp @@ -321,7 +321,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen } // namespace NEPoolingLayerKernel::NEPoolingLayerKernel() - : _func(nullptr), _input(nullptr), _output(nullptr), _pool_info(), _num_elems_processed_per_iteration(0), _border_size(0), _is_square(false) + : _func(nullptr), _input(nullptr), _output(nullptr), _pool_info(), _data_layout(DataLayout::UNKNOWN), _num_elems_processed_per_iteration(0), _border_size(0), _is_square(false) { } @@ -364,14 +364,15 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info, pooled_w, pooled_h)); // Set instance variables - _input = input; - _output = output; - _pool_info = pool_info; - _is_square = (pool_size.x() == pool_size.y()); + _input = input; + _output = output; + _pool_info = pool_info; + _data_layout = input->info()->data_layout(); + _is_square = (pool_size.x() == pool_size.y()); // Get data type const DataType data_type = input->info()->data_type(); - const bool is_nchw = data_layout == DataLayout::NCHW; + const bool is_nchw = _data_layout == DataLayout::NCHW; if(data_type == DataType::QASYMM8) { @@ -1574,7 +1575,12 @@ void NEPoolingLayerKernel::poolingMxN_f32_nhwc(const Window &window_input, const // Calculate square-root in case of l2 pooling if(pooling_type == PoolingType::L2) { - vres = vmulq_f32(vres, vinvsqrtq_f32(vres)); + float32x4_t l2_res = { static_cast<float>(sqrt(vgetq_lane_f32(vres, 0))), + static_cast<float>(sqrt(vgetq_lane_f32(vres, 1))), + static_cast<float>(sqrt(vgetq_lane_f32(vres, 2))), + static_cast<float>(sqrt(vgetq_lane_f32(vres, 3))) + }; + vres = l2_res; } // Store result @@ -1835,7 +1841,7 @@ void NEPoolingLayerKernel::run(const Window &window, const ThreadInfo &info) const bool exclude_padding = _pool_info.exclude_padding(); Window window_input(window); - if(_input->info()->data_layout() == DataLayout::NCHW) + if(_data_layout == DataLayout::NCHW) { // Set step for input in x and y direction for the input unsigned int window_x_inc = 0; diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp index ffa4fa3565..16cd6f77b4 100644 --- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp +++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -742,23 +742,8 @@ struct RedOpYZW for(unsigned int dim = 0; dim < in_info.dimension(axis); ++dim) { - T *in_ptr; - switch(axis) - { - case 1: - in_ptr = reinterpret_cast<T *>(input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, dim))); - break; - case 2: - in_ptr = reinterpret_cast<T *>(input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, 0, dim))); - break; - case 3: - in_ptr = reinterpret_cast<T *>(input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, 0, 0, dim))); - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - } + const T *in_ptr = reinterpret_cast<T *>(input.ptr() + in_info.strides_in_bytes()[axis] * dim); const auto vec_elements = wrapper::vloadq(in_ptr); - switch(op) { case ReductionOperation::SUM: @@ -907,23 +892,8 @@ struct RedOpYZW_qasymm8 for(unsigned int index_dim = 0; index_dim < in_info.dimension(axis); ++index_dim) { - uint8_t *in_ptr; - switch(axis) - { - case 1: - in_ptr = input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, index_dim)); - break; - case 2: - in_ptr = input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, 0, index_dim)); - break; - case 3: - in_ptr = input.ptr() + in_info.offset_element_in_bytes(Coordinates(0, 0, 0, index_dim)); - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - } - const auto vec_elements = wrapper::vloadq(in_ptr); - + const uint8_t *in_ptr = input.ptr() + in_info.strides_in_bytes()[axis] * index_dim; + const auto vec_elements = wrapper::vloadq(in_ptr); switch(op) { case ReductionOperation::SUM: diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp index a2a44fca18..5b8e196a2c 100644 --- a/src/core/NEON/kernels/NEScaleKernel.cpp +++ b/src/core/NEON/kernels/NEScaleKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -909,7 +909,7 @@ void NEScaleKernel::scale_area_nchw(const Window &window) void NEScaleKernel::scale_nhwc(const Window &window) { // Get data layout and width/height indices - const DataLayout data_layout = _input->info()->data_layout(); + const DataLayout data_layout = DataLayout::NHWC; const int idx_channels = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); diff --git a/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp b/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp index 4803365013..ffd2dc14bf 100644 --- a/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp +++ b/src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp @@ -66,7 +66,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, i } // namespace NESpaceToDepthLayerKernel::NESpaceToDepthLayerKernel() - : _input(nullptr), _output(nullptr), _block_shape() + : _input(nullptr), _output(nullptr), _block_shape(), _data_layout(DataLayout::UNKNOWN) { } @@ -82,6 +82,7 @@ void NESpaceToDepthLayerKernel::configure(const ITensor *input, ITensor *output, _input = input; _block_shape = block_shape; _output = output; + _data_layout = input->info()->data_layout(); // Configure kernel window Window win = calculate_max_window(*output->info(), Steps()); @@ -100,9 +101,8 @@ void NESpaceToDepthLayerKernel::run(const Window &window, const ThreadInfo &info ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window); - const DataLayout data_layout = _input->info()->data_layout(); - const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - const int element_size = _input->info()->element_size(); + const int channel_idx = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::CHANNEL); + const int element_size = _input->info()->element_size(); const size_t channel_size = _input->info()->dimension(channel_idx); @@ -111,7 +111,7 @@ void NESpaceToDepthLayerKernel::run(const Window &window, const ThreadInfo &info int batch_id = 0; // Main loop for NCHW and NHWC - if(_output->info()->data_layout() == DataLayout::NCHW) + if(_data_layout == DataLayout::NCHW) { do { |