From a1b8babbb492fa4cd3b392f6376a2dfa85fc854d Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Thu, 7 May 2020 12:13:44 +0100 Subject: COMPMID-3463 Refactor NECropKernel Removed most of the templates used in this kernel. This resulted in a reduction of 35Kb. Change-Id: I0d50aa769b361790d47a8017d795f2c92c6d8a6f Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3158 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Sang-Hoon Park --- arm_compute/core/NEON/kernels/NECropKernel.h | 13 +--- src/core/NEON/kernels/NECropKernel.cpp | 94 ++++++++++------------------ 2 files changed, 34 insertions(+), 73 deletions(-) diff --git a/arm_compute/core/NEON/kernels/NECropKernel.h b/arm_compute/core/NEON/kernels/NECropKernel.h index 7f2dabf5cd..557a7a8ff3 100644 --- a/arm_compute/core/NEON/kernels/NECropKernel.h +++ b/arm_compute/core/NEON/kernels/NECropKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -28,9 +28,6 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Types.h" -#include -#include - namespace arm_compute { // Forward declarations @@ -94,7 +91,7 @@ public: void run(const Window &window, const ThreadInfo &info) override; /** Function to use for in bounds crop for the particular tensor types passed to configure() */ - using InBoundsCropFunction = void(const ITensor *, const ITensor *, float *, Coordinates, int32_t, int32_t, int32_t); + using InBoundsCropFunction = void(const ITensor *, const ITensor *, float *, Coordinates, int32_t, int32_t, int32_t, bool, bool); private: const ITensor *_input; @@ -111,13 +108,7 @@ private: /** The number of columns out of bounds at the start and end of output. */ std::array _cols_out_of_bounds; - std::pair _in_bounds_crop_functions; NECropKernel::InBoundsCropFunction *_in_bounds_crop_function; - - using CropFunction = void(const ITensor *, const ITensor *, Coordinates, float, const std::array &, const std::array &, - NECropKernel::InBoundsCropFunction *); - - NECropKernel::CropFunction *_crop_function; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEON_CROP_KERNEL_H */ diff --git a/src/core/NEON/kernels/NECropKernel.cpp b/src/core/NEON/kernels/NECropKernel.cpp index f16eb3e6bd..4257611f0e 100644 --- a/src/core/NEON/kernels/NECropKernel.cpp +++ b/src/core/NEON/kernels/NECropKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -35,8 +35,6 @@ #include "arm_compute/core/utils/helpers/tensor_transform.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include - namespace arm_compute { namespace @@ -86,9 +84,9 @@ inline float32x4_t load_as_f32(float16_t *ptr) } #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -template +template inline void in_bounds_crop_window(const ITensor *input, const ITensor *output, float *output_ptr, Coordinates input_offset, - int32_t window_step_x, int32_t output_width_start, int32_t output_width_limit) + int32_t window_step_x, int32_t output_width_start, int32_t output_width_limit, bool input_has_single_channel, bool is_width_flipped) { // Reverse elements if width flipped. if(is_width_flipped) @@ -176,9 +174,9 @@ inline void out_of_bounds_crop_window(const ITensor *output, float *output_ptr, } } -template inline void execute_window(const ITensor *input, const ITensor *output, Coordinates input_offset, float extrapolation_value, - const std::array &rows_out_of_bounds, const std::array &cols_out_of_bounds, NECropKernel::InBoundsCropFunction *in_bounds_crop_function) + const std::array &rows_out_of_bounds, const std::array &cols_out_of_bounds, NECropKernel::InBoundsCropFunction *in_bounds_crop_function, + bool is_height_flipped, bool has_cols_in_bounds, bool has_cols_out_of_bounds_before, bool has_cols_out_of_bounds_after, bool input_has_single_channel, bool is_width_flipped) { // Output is always float. const int window_step_x = 16 / sizeof(float); @@ -214,7 +212,8 @@ inline void execute_window(const ITensor *input, const ITensor *output, Coordina // Copy all elements within the input bounds from the input tensor. if(has_cols_in_bounds) { - (*in_bounds_crop_function)(input, output, output_ptr, input_offset, window_step_x, cols_out_of_bounds[0], output->info()->dimension(1) - cols_out_of_bounds[1]); + (*in_bounds_crop_function)(input, output, output_ptr, input_offset, window_step_x, cols_out_of_bounds[0], + output->info()->dimension(1) - cols_out_of_bounds[1], input_has_single_channel, is_width_flipped); } // Fill all elements after the in bounds elements with the extrapolation value. if(has_cols_out_of_bounds_after) @@ -230,7 +229,7 @@ inline void execute_window(const ITensor *input, const ITensor *output, Coordina NECropKernel::NECropKernel() : _input(nullptr), _crop_boxes(nullptr), _box_ind(nullptr), _output(nullptr), _start(), _end(), _crop_box_ind(0), _extrapolation_value(0), _rows_out_of_bounds(), _cols_out_of_bounds(), - _in_bounds_crop_functions(), _in_bounds_crop_function(nullptr), _crop_function(nullptr) + _in_bounds_crop_function(nullptr) { } @@ -246,29 +245,30 @@ void NECropKernel::configure(const ITensor *input, const ITensor *crop_boxes, co _crop_box_ind = crop_box_ind; _extrapolation_value = extrapolation_value; - const static std::map, std::pair> in_map_function = + switch(input->info()->data_type()) { - { { DataType::F32, false }, { &in_bounds_crop_window, &in_bounds_crop_window } }, - { { DataType::F32, true }, { &in_bounds_crop_window, &in_bounds_crop_window } }, - { { DataType::U16, false }, { &in_bounds_crop_window, &in_bounds_crop_window } }, - { { DataType::U16, true }, { &in_bounds_crop_window, &in_bounds_crop_window } }, - { { DataType::S16, false }, { &in_bounds_crop_window, &in_bounds_crop_window } }, - { { DataType::S16, true }, { &in_bounds_crop_window, &in_bounds_crop_window } }, - { { DataType::U32, false }, { &in_bounds_crop_window, &in_bounds_crop_window } }, - { { DataType::U32, true }, { &in_bounds_crop_window, &in_bounds_crop_window } }, - { { DataType::S32, false }, { &in_bounds_crop_window, &in_bounds_crop_window } }, - { { DataType::S32, true }, { &in_bounds_crop_window, &in_bounds_crop_window } }, + case DataType::F32: + _in_bounds_crop_function = &in_bounds_crop_window; + break; #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - { { DataType::F16, false }, { &in_bounds_crop_window, &in_bounds_crop_window } }, - { { DataType::F16, false }, { &in_bounds_crop_window, &in_bounds_crop_window } } + case DataType::F16: + _in_bounds_crop_function = &in_bounds_crop_window; + break; #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - }; - - auto in_it = in_map_function.find({ input->info()->data_type(), input->info()->dimension(0) == 1 }); - - if(in_it != in_map_function.end()) - { - _in_bounds_crop_functions = in_it->second; + case DataType::U32: + _in_bounds_crop_function = &in_bounds_crop_window; + break; + case DataType::S32: + _in_bounds_crop_function = &in_bounds_crop_window; + break; + case DataType::U16: + _in_bounds_crop_function = &in_bounds_crop_window; + break; + case DataType::S16: + _in_bounds_crop_function = &in_bounds_crop_window; + break; + default: + ARM_COMPUTE_ERROR("Datatype not supported"); } } @@ -309,8 +309,6 @@ void NECropKernel::configure_output_shape() const TensorShape out_shape(_input->info()->tensor_shape()[0], abs(_end[0] - _start[0]) + 1, abs(_end[1] - _start[1]) + 1); _output->info()->set_tensor_shape(out_shape); - _in_bounds_crop_function = _start[0] <= _end[0] ? _in_bounds_crop_functions.first : _in_bounds_crop_functions.second; - bool is_width_flipped = _end[0] < _start[0]; bool is_height_flipped = _end[1] < _start[1]; if(is_height_flipped) @@ -350,36 +348,6 @@ void NECropKernel::configure_output_shape() 0; } - const static std::map, NECropKernel::CropFunction *> map_function = - { - { std::make_tuple(false, false, false, false), &execute_window }, - { std::make_tuple(false, false, false, true), &execute_window }, - { std::make_tuple(false, false, true, false), &execute_window }, - { std::make_tuple(false, false, true, true), &execute_window }, - { std::make_tuple(false, true, false, false), &execute_window }, - { std::make_tuple(false, true, false, true), &execute_window }, - { std::make_tuple(false, true, true, false), &execute_window }, - { std::make_tuple(false, true, true, true), &execute_window }, - { std::make_tuple(true, false, false, false), &execute_window }, - { std::make_tuple(true, false, false, true), &execute_window }, - { std::make_tuple(true, false, true, false), &execute_window }, - { std::make_tuple(true, false, true, true), &execute_window }, - { std::make_tuple(true, true, false, false), &execute_window }, - { std::make_tuple(true, true, false, true), &execute_window }, - { std::make_tuple(true, true, true, false), &execute_window }, - { std::make_tuple(true, true, true, true), &execute_window }, - }; - - auto it = map_function.find(std::make_tuple(is_height_flipped, - _cols_out_of_bounds[0] + _cols_out_of_bounds[1] < _output->info()->dimension(1), - _cols_out_of_bounds[0] > 0, - _cols_out_of_bounds[1] > 0)); - - if(it != map_function.end()) - { - _crop_function = it->second; - } - INEKernel::configure(calculate_max_window(*_output->info())); } @@ -395,6 +363,8 @@ void NECropKernel::run(const Window &window, const ThreadInfo &info) uint32_t batch_index = *(reinterpret_cast(_box_ind->ptr_to_element(Coordinates(_crop_box_ind)))); Coordinates input_offset(0, _end[0] < _start[0] ? _start[0] - _cols_out_of_bounds[0] : _start[0] + _cols_out_of_bounds[0], _end[1] < _start[1] ? _start[1] - _rows_out_of_bounds[0] : _start[1] + _rows_out_of_bounds[0], batch_index); - (*_crop_function)(_input, _output, input_offset, _extrapolation_value, _rows_out_of_bounds, _cols_out_of_bounds, _in_bounds_crop_function); + execute_window(_input, _output, input_offset, _extrapolation_value, _rows_out_of_bounds, _cols_out_of_bounds, _in_bounds_crop_function, _end[1] < _start[1], + _cols_out_of_bounds[0] + _cols_out_of_bounds[1] < _output->info()->dimension(1), _cols_out_of_bounds[0] > 0, _cols_out_of_bounds[1] > 0, + _start[0] <= _end[0], _end[0] < _start[0]); } } // namespace arm_compute -- cgit v1.2.1