From a50e702289af66944e860eafc7f3b32f6c5f30be Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Tue, 9 Apr 2019 14:03:17 +0100 Subject: COMPMID-2012: Remove unnecessary templates from NEON kernels Change-Id: I2deb26188c7de7c6ad10d2f51f83e729fed7e5e2 Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/961 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins --- arm_compute/core/NEON/kernels/NEReorgLayerKernel.h | 23 +--- arm_compute/core/NEON/kernels/NEStackLayerKernel.h | 14 --- .../core/NEON/kernels/NEWeightsReshapeKernel.h | 11 +- src/core/NEON/kernels/NEReorgLayerKernel.cpp | 96 ++++++---------- src/core/NEON/kernels/NEReverseKernel.cpp | 18 +-- src/core/NEON/kernels/NEStackLayerKernel.cpp | 33 +----- src/core/NEON/kernels/NEWeightsReshapeKernel.cpp | 128 ++++++++------------- 7 files changed, 100 insertions(+), 223 deletions(-) diff --git a/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h b/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h index 7e0fb4350d..076af4fd1c 100644 --- a/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEReorgLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -75,24 +75,9 @@ public: void run(const Window &window, const ThreadInfo &info) override; private: - /** Template function to run the reorg - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template - void run_reorg(const Window &window); - - /** Common signature for all the specialised reorg functions - * - * @param[in] window Region on which to execute the kernel. - */ - using ReorgFunctionPtr = void (NEReorgLayerKernel::*)(const Window &window); - -private: - ReorgFunctionPtr _func; - const ITensor *_input; - ITensor *_output; - int32_t _stride; + const ITensor *_input; + ITensor *_output; + int32_t _stride; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_NEREORGLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEStackLayerKernel.h b/arm_compute/core/NEON/kernels/NEStackLayerKernel.h index 3a9e81fa94..42a0539c9f 100644 --- a/arm_compute/core/NEON/kernels/NEStackLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEStackLayerKernel.h @@ -84,24 +84,10 @@ public: void run(const Window &window, const ThreadInfo &info) override; private: - /** Template function to run the stack - * - * @param[in] window Region on which to execute the kernel. (Must be a valid region of the window returned by window()). - */ - template - void run_stack(const Window &window); - - /** Common signature for all the specialised stack functions - * - * @param[in] window Region on which to execute the kernel. - */ - using StackFunctionPtr = void (NEStackLayerKernel::*)(const Window &window); - const ITensor *_input; ITensor *_output; unsigned int _axis; unsigned int _idx_input; - StackFunctionPtr _func; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_NESTACKLAYERKERNEL_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h index 21f36f6c2b..bba18a8fa8 100644 --- a/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h +++ b/arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -99,12 +99,9 @@ public: void run(const Window &window, const ThreadInfo &info) override; private: - using WeightsReshapeKernel = void(const ITensor *input, const ITensor *bias, ITensor *output, const Window &window); - - WeightsReshapeKernel *_func; - const ITensor *_input; - const ITensor *_bias; - ITensor *_output; + const ITensor *_input; + const ITensor *_bias; + ITensor *_output; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_NEWEIGHTSRESHAPEKERNEL_H__ */ diff --git a/src/core/NEON/kernels/NEReorgLayerKernel.cpp b/src/core/NEON/kernels/NEReorgLayerKernel.cpp index 8baea2b990..ece5aa431c 100644 --- a/src/core/NEON/kernels/NEReorgLayerKernel.cpp +++ b/src/core/NEON/kernels/NEReorgLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -67,47 +67,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, i } } // namespace -template -void NEReorgLayerKernel::run_reorg(const Window &window) -{ - const DataLayout data_layout = _input->info()->data_layout(); - const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); - - const unsigned int stride = _stride; - const unsigned int out_c = _output->info()->tensor_shape()[idx_c] / (stride * stride); - const uint8_t *in_ptr = _input->buffer(); - - // Collapse - Window collapsed_window = window.collapse_if_possible(window, 4); - - // Create Iterator - Iterator out(_output, collapsed_window); - - // Perform reorg - execute_window_loop(collapsed_window, [&](const Coordinates & id) - { - // Get spatial coords and channels - const unsigned int w = id[idx_w]; - const unsigned int h = id[idx_h]; - const unsigned int c = id[idx_c]; - - // Calculate mapping - const unsigned int offset = c / out_c; - Coordinates map_coords = id; - map_coords.set(idx_w, w * stride + offset % stride); - map_coords.set(idx_h, h * stride + offset / stride); - map_coords.set(idx_c, c % out_c); - - // Perform mapping - *(reinterpret_cast(out.ptr())) = *(reinterpret_cast(in_ptr + _input->info()->offset_element_in_bytes(map_coords))); - }, - out); -} - NEReorgLayerKernel::NEReorgLayerKernel() - : _func(nullptr), _input(nullptr), _output(nullptr), _stride(1) + : _input(nullptr), _output(nullptr), _stride(1) { } @@ -122,27 +83,10 @@ void NEReorgLayerKernel::configure(const ITensor *input, ITensor *output, int32_ // Perform validation step ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), stride)); - _func = nullptr; _input = input; _output = output; _stride = stride; - switch(input->info()->element_size()) - { - case 1: - _func = &NEReorgLayerKernel::run_reorg; - break; - case 2: - _func = &NEReorgLayerKernel::run_reorg; - break; - case 4: - _func = &NEReorgLayerKernel::run_reorg; - break; - default: - ARM_COMPUTE_ERROR("Element size not supported"); - break; - } - // The NEReorgLayerKernel doesn't need padding so update_window_and_padding() can be skipped output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); @@ -164,9 +108,39 @@ void NEReorgLayerKernel::run(const Window &window, const ThreadInfo &info) ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window); - if(_func != nullptr) + const DataLayout data_layout = _input->info()->data_layout(); + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + + const unsigned int stride = _stride; + const unsigned int out_c = _output->info()->tensor_shape()[idx_c] / (stride * stride); + const uint8_t *in_ptr = _input->buffer(); + + // Collapse + Window collapsed_window = window.collapse_if_possible(window, 4); + + // Create Iterator + Iterator out(_output, collapsed_window); + + // Perform reorg + execute_window_loop(collapsed_window, [&](const Coordinates & id) { - (this->*_func)(window); - } + // Get spatial coords and channels + const unsigned int w = id[idx_w]; + const unsigned int h = id[idx_h]; + const unsigned int c = id[idx_c]; + + // Calculate mapping + const unsigned int offset = c / out_c; + Coordinates map_coords = id; + map_coords.set(idx_w, w * stride + offset % stride); + map_coords.set(idx_h, h * stride + offset / stride); + map_coords.set(idx_c, c % out_c); + + // Perform mapping + std::memcpy(out.ptr(), in_ptr + _input->info()->offset_element_in_bytes(map_coords), _input->info()->element_size()); + }, + out); } } // namespace arm_compute diff --git a/src/core/NEON/kernels/NEReverseKernel.cpp b/src/core/NEON/kernels/NEReverseKernel.cpp index 62e48829f6..36398cf89a 100644 --- a/src/core/NEON/kernels/NEReverseKernel.cpp +++ b/src/core/NEON/kernels/NEReverseKernel.cpp @@ -189,31 +189,21 @@ void NEReverseKernel::run(const Window &window, const ThreadInfo &info) switch(_input->info()->data_type()) { case DataType::F32: - run_reverse(window, _input, _axis, _output); + case DataType::U32: + case DataType::S32: + run_reverse(window, _input, _axis, _output); break; #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: - run_reverse(window, _input, _axis, _output); - break; #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::U32: - run_reverse(window, _input, _axis, _output); - break; - case DataType::S32: - run_reverse(window, _input, _axis, _output); - break; case DataType::S16: - run_reverse(window, _input, _axis, _output); - break; case DataType::U16: run_reverse(window, _input, _axis, _output); break; case DataType::QASYMM8: case DataType::U8: - run_reverse(window, _input, _axis, _output); - break; case DataType::S8: - run_reverse(window, _input, _axis, _output); + run_reverse(window, _input, _axis, _output); break; default: ARM_COMPUTE_ERROR("Data type not supported"); diff --git a/src/core/NEON/kernels/NEStackLayerKernel.cpp b/src/core/NEON/kernels/NEStackLayerKernel.cpp index 0c33f36983..3447d59bcc 100644 --- a/src/core/NEON/kernels/NEStackLayerKernel.cpp +++ b/src/core/NEON/kernels/NEStackLayerKernel.cpp @@ -87,7 +87,7 @@ inline Coordinates shift_from_axis_and_replace_coordinate(const Coordinates &id, } // namespace NEStackLayerKernel::NEStackLayerKernel() - : _input(nullptr), _output(nullptr), _axis(), _idx_input(), _func(nullptr) + : _input(nullptr), _output(nullptr), _axis(), _idx_input() { } @@ -101,22 +101,6 @@ void NEStackLayerKernel::configure(const ITensor *input, unsigned int axis, unsi _axis = axis; _idx_input = idx_input; - switch(input->info()->element_size()) - { - case 1: - _func = &NEStackLayerKernel::run_stack; - break; - case 2: - _func = &NEStackLayerKernel::run_stack; - break; - case 4: - _func = &NEStackLayerKernel::run_stack; - break; - default: - ARM_COMPUTE_ERROR("Element size not supported"); - break; - } - // Configure kernel window auto win_config = validate_and_configure_window(input->info(), axis, num_tensors, output->info()); @@ -137,15 +121,6 @@ void NEStackLayerKernel::run(const Window &window, const ThreadInfo &info) ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - if(_func != nullptr) - { - (this->*_func)(window); - } -} - -template -void NEStackLayerKernel::run_stack(const Window &window) -{ Window window_out; window_out.use_tensor_dimensions(_output->info()->tensor_shape()); @@ -160,9 +135,9 @@ void NEStackLayerKernel::run_stack(const Window &window) execute_window_loop(window, [&](const Coordinates & id) { - Coordinates id_out = shift_from_axis_and_replace_coordinate(id, _axis, _idx_input); - const int idx = id_out[0] * stride_x + id_out[1] * stride_y + id_out[2] * stride_z + id_out[3] * stride_w + id_out[4] * stride_k; - *(reinterpret_cast(output.ptr() + idx)) = *(reinterpret_cast(input.ptr())); + Coordinates id_out = shift_from_axis_and_replace_coordinate(id, _axis, _idx_input); + const int idx = id_out[0] * stride_x + id_out[1] * stride_y + id_out[2] * stride_z + id_out[3] * stride_w + id_out[4] * stride_k; + std::memcpy(output.ptr() + idx, input.ptr(), _input->info()->element_size()); }, input); } diff --git a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp index 4a0cf27592..624833adfb 100644 --- a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp +++ b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp @@ -34,59 +34,6 @@ using namespace arm_compute; namespace { -template -void weights_reshape(const ITensor *input, const ITensor *bias, ITensor *output, const Window &window) -{ - const unsigned int kernel_size_x = input->info()->dimension(0); - const unsigned int kernel_size_y = input->info()->dimension(1); - const unsigned int kernel_depth = input->info()->dimension(2); - const unsigned int input_stride_x = input->info()->strides_in_bytes().x(); - const unsigned int input_stride_y = input->info()->strides_in_bytes().y(); - const unsigned int input_stride_z = input->info()->strides_in_bytes().z(); - const unsigned int output_stride_y = output->info()->strides_in_bytes().y(); - - // Create iterators - Iterator in(input, window); - execute_window_loop(window, [&](const Coordinates & id) - { - // Get column index - const int kernel_idx = id[3]; - const int kernel_idz = id[4]; - - // Setup pointers - const uint8_t *tmp_input_ptr = in.ptr(); - uint8_t *tmp_output_ptr = output->ptr_to_element(Coordinates(kernel_idx, 0, kernel_idz)); - const uint8_t *curr_input_row_ptr = tmp_input_ptr; - const uint8_t *curr_input_depth_ptr = tmp_input_ptr; - - // Linearize volume - for(unsigned int d = 0; d < kernel_depth; ++d) - { - for(unsigned int j = 0; j < kernel_size_y; ++j) - { - for(unsigned int i = 0; i < kernel_size_x; ++i) - { - *(reinterpret_cast(tmp_output_ptr)) = *(reinterpret_cast(tmp_input_ptr)); - tmp_input_ptr += input_stride_x; - tmp_output_ptr += output_stride_y; - } - curr_input_row_ptr += input_stride_y; - tmp_input_ptr = curr_input_row_ptr; - } - curr_input_depth_ptr += input_stride_z; - curr_input_row_ptr = curr_input_depth_ptr; - tmp_input_ptr = curr_input_depth_ptr; - } - - // Add bias - if(bias != nullptr) - { - *(reinterpret_cast(tmp_output_ptr)) = *(reinterpret_cast(bias->ptr_to_element(Coordinates(kernel_idx, kernel_idz)))); - } - }, - in); -} - TensorShape get_output_shape(const ITensorInfo *input, bool has_bias) { TensorShape output_shape{ input->tensor_shape() }; @@ -141,7 +88,7 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } // namespace NEWeightsReshapeKernel::NEWeightsReshapeKernel() - : _func(nullptr), _input(nullptr), _bias(nullptr), _output(nullptr) + : _input(nullptr), _bias(nullptr), _output(nullptr) { } @@ -161,30 +108,6 @@ void NEWeightsReshapeKernel::configure(const ITensor *input, const ITensor *bias _bias = bias; _output = output; - switch(_input->info()->element_size()) - { - case 4: - { - _func = &weights_reshape; - break; - } - case 2: - { - _func = &weights_reshape; - break; - } - case 1: - { - _func = &weights_reshape; - break; - } - default: - { - ARM_COMPUTE_ERROR_ON("Element size not supported"); - break; - } - } - // Configure kernel auto win_config = validate_and_configure_window(input->info(), output->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); @@ -205,5 +128,52 @@ void NEWeightsReshapeKernel::run(const Window &window, const ThreadInfo &info) ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - (*_func)(_input, _bias, _output, window); + const unsigned int kernel_size_x = _input->info()->dimension(0); + const unsigned int kernel_size_y = _input->info()->dimension(1); + const unsigned int kernel_depth = _input->info()->dimension(2); + const unsigned int input_stride_x = _input->info()->strides_in_bytes().x(); + const unsigned int input_stride_y = _input->info()->strides_in_bytes().y(); + const unsigned int input_stride_z = _input->info()->strides_in_bytes().z(); + const unsigned int output_stride_y = _output->info()->strides_in_bytes().y(); + + // Create iterators + Iterator in(_input, window); + execute_window_loop(window, [&](const Coordinates & id) + { + // Get column index + const int kernel_idx = id[3]; + const int kernel_idz = id[4]; + + // Setup pointers + const uint8_t *tmp_input_ptr = in.ptr(); + uint8_t *tmp_output_ptr = _output->ptr_to_element(Coordinates(kernel_idx, 0, kernel_idz)); + const uint8_t *curr_input_row_ptr = tmp_input_ptr; + const uint8_t *curr_input_depth_ptr = tmp_input_ptr; + + // Linearize volume + for(unsigned int d = 0; d < kernel_depth; ++d) + { + for(unsigned int j = 0; j < kernel_size_y; ++j) + { + for(unsigned int i = 0; i < kernel_size_x; ++i) + { + std::memcpy(tmp_output_ptr, tmp_input_ptr, _input->info()->element_size()); + tmp_input_ptr += input_stride_x; + tmp_output_ptr += output_stride_y; + } + curr_input_row_ptr += input_stride_y; + tmp_input_ptr = curr_input_row_ptr; + } + curr_input_depth_ptr += input_stride_z; + curr_input_row_ptr = curr_input_depth_ptr; + tmp_input_ptr = curr_input_depth_ptr; + } + + // Add bias + if(_bias != nullptr) + { + std::memcpy(tmp_output_ptr, _bias->ptr_to_element(Coordinates(kernel_idx, kernel_idz)), _input->info()->element_size()); + } + }, + in); } -- cgit v1.2.1