diff options
Diffstat (limited to 'src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp')
-rw-r--r-- | src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp | 115 |
1 files changed, 61 insertions, 54 deletions
diff --git a/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp index 45f6fd99fe..3b53b7055f 100644 --- a/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp +++ b/src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,9 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/core/NEON/kernels/NEChannelShuffleLayerKernel.h" +#include "src/core/NEON/kernels/NEChannelShuffleLayerKernel.h" -#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -32,25 +31,33 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "src/core/CPP/Validate.h" +#include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/WindowHelpers.h" + namespace arm_compute { namespace { Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups) { - // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use NEON FP16 instructions. + // Note: ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input) is not needed here as this kernel doesn't use CPU FP16 instructions. ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::UNKNOWN); ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(input, DataLayout::NCHW, DataLayout::NHWC); - const unsigned int channels = input->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL)); + const unsigned int channels = + input->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::CHANNEL)); ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups < 2, "Channel shuffling with less than 2 groups would be inefficient"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups == channels, "Channel shuffling with same number of groups as number of channels would be inefficient"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG( + num_groups == channels, + "Channel shuffling with same number of groups as number of channels would be inefficient"); ARM_COMPUTE_RETURN_ERROR_ON(num_groups > channels); // There cannot be more groups than channels - ARM_COMPUTE_RETURN_ERROR_ON_MSG((channels % num_groups) != 0, "The number of channels must be a multiple of the number of groups"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG((channels % num_groups) != 0, + "The number of channels must be a multiple of the number of groups"); // Checks performed when output is configured - if(output->total_size() != 0) + if (output->total_size() != 0) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); @@ -66,24 +73,26 @@ void channel_shuffle_nhwc(const ITensor *input, ITensor *output, unsigned int nu const size_t element_size = input->info()->element_size(); const unsigned int K = input->info()->dimension(channel_idx) / num_groups; - const float rK = 1.f / K; + const double rK = 1.0 / K; Iterator in(input, window); - execute_window_loop(window, [&](const Coordinates & id) - { - // Shuffle channel - const unsigned int curr_channel = id.x(); - const unsigned int group_id = curr_channel * rK; - const unsigned int r = group_id * K; - const unsigned int channel_id = curr_channel - r; - - // Calculate output coordinates - Coordinates out_coords = id; - out_coords.set(Window::DimX, channel_id * num_groups + group_id); - std::copy_n(in.ptr(), element_size, output->ptr_to_element(out_coords)); - }, - in); + execute_window_loop( + window, + [&](const Coordinates &id) + { + // Shuffle channel + const unsigned int curr_channel = id.x(); + const unsigned int group_id = curr_channel * rK; + const unsigned int r = group_id * K; + const unsigned int channel_id = curr_channel - r; + + // Calculate output coordinates + Coordinates out_coords = id; + out_coords.set(Window::DimX, channel_id * num_groups + group_id); + std::copy_n(in.ptr(), element_size, output->ptr_to_element(out_coords)); + }, + in); } void channel_shuffle_nchw(const ITensor *input, ITensor *output, unsigned int num_groups, const Window &window) { @@ -101,38 +110,39 @@ void channel_shuffle_nchw(const ITensor *input, ITensor *output, unsigned int nu const size_t row_size = input->info()->dimension(width_idx) * input->info()->element_size(); const unsigned int K = input->info()->dimension(channel_idx) / num_groups; - const float rK = 1.f / K; + const double rK = 1.0 / K; Iterator in(input, win); - execute_window_loop(win, [&](const Coordinates & id) - { - // Shuffle channel - const unsigned int curr_channel = id.z(); - const unsigned int group_id = curr_channel * rK; - const unsigned int r = group_id * K; - const unsigned int channel_id = curr_channel - r; - - // Calculate output coordinates - Coordinates out_coords = id; - out_coords.set(Window::DimZ, channel_id * num_groups + group_id); - const uint8_t *input_ptr = in.ptr(); - uint8_t *output_ptr = output->ptr_to_element(out_coords); - - // Copy plane - for(unsigned int y = 0; y < height; ++y) + execute_window_loop( + win, + [&](const Coordinates &id) { - std::copy_n(input_ptr, row_size, output_ptr); - input_ptr += input_stride_y; - output_ptr += output_stride_y; - } - }, - in); + // Shuffle channel + const unsigned int curr_channel = id.z(); + const unsigned int group_id = curr_channel * rK; + const unsigned int r = group_id * K; + const unsigned int channel_id = curr_channel - r; + + // Calculate output coordinates + Coordinates out_coords = id; + out_coords.set(Window::DimZ, channel_id * num_groups + group_id); + const uint8_t *input_ptr = in.ptr(); + uint8_t *output_ptr = output->ptr_to_element(out_coords); + + // Copy plane + for (unsigned int y = 0; y < height; ++y) + { + std::copy_n(input_ptr, row_size, output_ptr); + input_ptr += input_stride_y; + output_ptr += output_stride_y; + } + }, + in); } } // namespace -NEChannelShuffleLayerKernel::NEChannelShuffleLayerKernel() - : _input(nullptr), _output(nullptr), _num_groups() +NEChannelShuffleLayerKernel::NEChannelShuffleLayerKernel() : _input(nullptr), _output(nullptr), _num_groups() { } @@ -153,14 +163,11 @@ void NEChannelShuffleLayerKernel::configure(const ITensor *input, ITensor *outpu Window win = calculate_max_window(*input->info(), Steps()); // The NEChannelShuffleLayerKernel doesn't need padding so update_window_and_padding() can be skipped - Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); - INEKernel::configure(win); } -Status NEChannelShuffleLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups) +Status +NEChannelShuffleLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, unsigned int num_groups) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, num_groups)); return Status{}; @@ -172,7 +179,7 @@ void NEChannelShuffleLayerKernel::run(const Window &window, const ThreadInfo &in ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - switch(_input->info()->data_layout()) + switch (_input->info()->data_layout()) { case DataLayout::NHWC: channel_shuffle_nhwc(_input, _output, _num_groups, window); |