From a50e702289af66944e860eafc7f3b32f6c5f30be Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Tue, 9 Apr 2019 14:03:17 +0100 Subject: COMPMID-2012: Remove unnecessary templates from NEON kernels Change-Id: I2deb26188c7de7c6ad10d2f51f83e729fed7e5e2 Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/961 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins --- src/core/NEON/kernels/NEReorgLayerKernel.cpp | 96 ++++++++++------------------ 1 file changed, 35 insertions(+), 61 deletions(-) (limited to 'src/core/NEON/kernels/NEReorgLayerKernel.cpp') diff --git a/src/core/NEON/kernels/NEReorgLayerKernel.cpp b/src/core/NEON/kernels/NEReorgLayerKernel.cpp index 8baea2b990..ece5aa431c 100644 --- a/src/core/NEON/kernels/NEReorgLayerKernel.cpp +++ b/src/core/NEON/kernels/NEReorgLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -67,47 +67,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, i } } // namespace -template -void NEReorgLayerKernel::run_reorg(const Window &window) -{ - const DataLayout data_layout = _input->info()->data_layout(); - const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - - const unsigned int stride = _stride; - const unsigned int out_c = _output->info()->tensor_shape()[idx_c] / (stride * stride); - const uint8_t *in_ptr = _input->buffer(); - - // Collapse - Window collapsed_window = window.collapse_if_possible(window, 4); - - // Create Iterator - Iterator out(_output, collapsed_window); - - // Perform reorg - execute_window_loop(collapsed_window, [&](const Coordinates & id) - { - // Get spatial coords and channels - const unsigned int w = id[idx_w]; - const unsigned int h = id[idx_h]; - const unsigned int c = id[idx_c]; - - // Calculate mapping - const unsigned int offset = c / out_c; - Coordinates map_coords = id; - map_coords.set(idx_w, w * stride + offset % stride); - map_coords.set(idx_h, h * stride + offset / stride); - map_coords.set(idx_c, c % out_c); - - // Perform mapping - *(reinterpret_cast(out.ptr())) = *(reinterpret_cast(in_ptr + _input->info()->offset_element_in_bytes(map_coords))); - }, - out); -} - NEReorgLayerKernel::NEReorgLayerKernel() - : _func(nullptr), _input(nullptr), _output(nullptr), _stride(1) + : _input(nullptr), _output(nullptr), _stride(1) { } @@ -122,27 +83,10 @@ void NEReorgLayerKernel::configure(const ITensor *input, ITensor *output, int32_ // Perform validation step ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), stride)); - _func = nullptr; _input = input; _output = output; _stride = stride; - switch(input->info()->element_size()) - { - case 1: - _func = &NEReorgLayerKernel::run_reorg; - break; - case 2: - _func = &NEReorgLayerKernel::run_reorg; - break; - case 4: - _func = &NEReorgLayerKernel::run_reorg; - break; - default: - ARM_COMPUTE_ERROR("Element size not supported"); - break; - } - // The NEReorgLayerKernel doesn't need padding so update_window_and_padding() can be skipped output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); @@ -164,9 +108,39 @@ void NEReorgLayerKernel::run(const Window &window, const ThreadInfo &info) ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window); - if(_func != nullptr) + const DataLayout data_layout = _input->info()->data_layout(); + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + + const unsigned int stride = _stride; + const unsigned int out_c = _output->info()->tensor_shape()[idx_c] / (stride * stride); + const uint8_t *in_ptr = _input->buffer(); + + // Collapse + Window collapsed_window = window.collapse_if_possible(window, 4); + + // Create Iterator + Iterator out(_output, collapsed_window); + + // Perform reorg + execute_window_loop(collapsed_window, [&](const Coordinates & id) { - (this->*_func)(window); - } + // Get spatial coords and channels + const unsigned int w = id[idx_w]; + const unsigned int h = id[idx_h]; + const unsigned int c = id[idx_c]; + + // Calculate mapping + const unsigned int offset = c / out_c; + Coordinates map_coords = id; + map_coords.set(idx_w, w * stride + offset % stride); + map_coords.set(idx_h, h * stride + offset / stride); + map_coords.set(idx_c, c % out_c); + + // Perform mapping + std::memcpy(out.ptr(), in_ptr + _input->info()->offset_element_in_bytes(map_coords), _input->info()->element_size()); + }, + out); } } // namespace arm_compute -- cgit v1.2.1