From bb1ad0c29fb941c93708ff4b2d211af3af51d213 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Fri, 28 Feb 2020 17:31:27 +0000 Subject: COMPMID-3165: Remove padding from NEYOLOLayerKernel Change-Id: I74338c3b0a173337cd1c8d89cb98217b7c943d5b Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2814 Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h | 24 +--- src/core/NEON/kernels/NEYOLOLayerKernel.cpp | 163 ++++++++-------------- 2 files changed, 67 insertions(+), 120 deletions(-) diff --git a/arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h b/arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h index b88fcf4567..0fd3f8ce67 100644 --- a/arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEYOLOLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -77,28 +77,18 @@ public: void run(const Window &window, const ThreadInfo &info) override; private: -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - /** Function to run YOLO layer on fp16 + /** Function to run YOLO layer * * @param[in] window Region on which to execute the kernel. */ - void yolo_layer_fp16_nchw(const Window &window); - /** Function to run batch normalization on fp16 on tensors with NHWC format + template + void yolo_layer_nchw(const Window &window); + /** Function to run YOLO layer on tensors with NHWC format * * @param[in] window Region on which to execute the kernel. */ - void yolo_layer_fp16_nhwc(const Window &window); -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - /** Function to run YOLO layer on fp32 - * - * @param[in] window Region on which to execute the kernel. - */ - void yolo_layer_fp32_nchw(const Window &window); - /** Function to run YOLO layer on fp32 on tensors with NHWC format - * - * @param[in] window Region on which to execute the kernel. - */ - void yolo_layer_fp32_nhwc(const Window &window); + template + void yolo_layer_nhwc(const Window &window); /** Common signature for all the yolo layer functions * * @param[in] window Region on which to execute the kernel. diff --git a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp index 383c2b8b99..595611890b 100644 --- a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp +++ b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,7 +37,8 @@ #include -using namespace arm_compute; +namespace arm_compute +{ namespace { Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info, int32_t num_classes) @@ -61,38 +62,6 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c return Status{}; } - -std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) -{ - if(output != nullptr) - { - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - - // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output, *input); - } - - const bool is_nchw = input->data_layout() == DataLayout::NCHW; - const unsigned int num_elems_processed_per_iteration = is_nchw ? 16 / input->element_size() : 1; - - Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); - bool window_changed = false; - - if(output != nullptr) - { - AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); - window_changed = update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, input->valid_region()); - } - else - { - window_changed = update_window_and_padding(win, AccessWindowHorizontal(input, 0, num_elems_processed_per_iteration)); - } - - Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; - return std::make_pair(err, win); -} } // namespace NEYOLOLayerKernel::NEYOLOLayerKernel() @@ -100,89 +69,67 @@ NEYOLOLayerKernel::NEYOLOLayerKernel() { } -void NEYOLOLayerKernel::yolo_layer_fp32_nchw(const Window &window) +template +void NEYOLOLayerKernel::yolo_layer_nchw(const Window &window) { - Iterator input(_input, window); - Iterator output(_output, window); - - execute_window_loop(window, [&](const Coordinates & id) - { - float32x4_t res = vld1q_f32(reinterpret_cast(input.ptr())); + const auto window_start_x = static_cast(window.x().start()); + const auto window_end_x = static_cast(window.x().end()); + const int window_step_x = S; - const int box_ch_id = id.z() % (_num_classes + 5); - const bool activate = box_ch_id != 2 && box_ch_id != 3; - - // Perform activation - if(activate) - { - auto activation = ::detail::logistic(_act_info); - activation(res); - } - - // Store results - vst1q_f32(reinterpret_cast(output.ptr()), res); - }, - input, output); -} - -void NEYOLOLayerKernel::yolo_layer_fp32_nhwc(const Window &window) -{ - Iterator input(_input, window); - Iterator output(_output, window); + Window win{ window }; + win.set(Window::DimX, Window::Dimension(0, 1, 1)); + Iterator input(_input, win); + Iterator output(_output, win); - execute_window_loop(window, [&](const Coordinates & id) + execute_window_loop(win, [&](const Coordinates & id) { - float res = *(reinterpret_cast(input.ptr())); + const auto input_ptr = reinterpret_cast(input.ptr()); + const auto output_ptr = reinterpret_cast(output.ptr()); + int x = window_start_x; + const int box_ch_id = id.z() % (_num_classes + 5); + const bool activate = box_ch_id != 2 && box_ch_id != 3; - const int box_ch_id = id.x() % (_num_classes + 5); - const bool activate = box_ch_id != 2 && box_ch_id != 3; - - // Perform activation - if(activate) + for(; x <= (window_end_x - window_step_x); x += window_step_x) { - res = 1.f / (1.f + std::exp(-res)); - } + auto res = wrapper::vloadq(input_ptr + x); - // Store result - *(reinterpret_cast(output.ptr())) = res; - }, - input, output); -} + // Perform activation + if(activate) + { + auto activation = detail::logistic(_act_info); + activation(res); + } -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -void NEYOLOLayerKernel::yolo_layer_fp16_nchw(const Window &window) -{ - Iterator input(_input, window); - Iterator output(_output, window); + // Store results + wrapper::vstore(output_ptr + x, res); + } - execute_window_loop(window, [&](const Coordinates & id) - { - float16x8_t res = vld1q_f16(reinterpret_cast(input.ptr())); + // Compute left-over elements + for(; x < window_end_x; ++x) + { + auto res = *(input_ptr + x); - const int box_ch_id = id.z() % (_num_classes + 5); - const bool activate = box_ch_id != 2 && box_ch_id != 3; + // Perform activation + if(activate) + { + res = 1.f / (1.f + std::exp(-res)); + } - // Perform activation - if(activate) - { - auto activation = ::detail::logistic(_act_info); - activation(res); + *(output_ptr + x) = res; } - - // Store results - vst1q_f16(reinterpret_cast(output.ptr()), res); }, input, output); } -void NEYOLOLayerKernel::yolo_layer_fp16_nhwc(const Window &window) +template +void NEYOLOLayerKernel::yolo_layer_nhwc(const Window &window) { Iterator input(_input, window); Iterator output(_output, window); execute_window_loop(window, [&](const Coordinates & id) { - float16_t res = *(reinterpret_cast(input.ptr())); + auto res = *(reinterpret_cast(input.ptr())); const int box_ch_id = id.x() % (_num_classes + 5); const bool activate = box_ch_id != 2 && box_ch_id != 3; @@ -194,11 +141,10 @@ void NEYOLOLayerKernel::yolo_layer_fp16_nhwc(const Window &window) } // Store result - *(reinterpret_cast(output.ptr())) = res; + *(reinterpret_cast(output.ptr())) = res; }, input, output); } -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ void NEYOLOLayerKernel::configure(ITensor *input, ITensor *output, const ActivationLayerInfo &act_info, int32_t num_classes) { @@ -214,27 +160,37 @@ void NEYOLOLayerKernel::configure(ITensor *input, ITensor *output, const Activat { #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: - _func = (_input->info()->data_layout() == DataLayout::NHWC) ? &NEYOLOLayerKernel::yolo_layer_fp16_nhwc : &NEYOLOLayerKernel::yolo_layer_fp16_nchw; + _func = (_input->info()->data_layout() == DataLayout::NHWC) ? &NEYOLOLayerKernel::yolo_layer_nhwc : &NEYOLOLayerKernel::yolo_layer_nchw; break; #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F32: - _func = (_input->info()->data_layout() == DataLayout::NHWC) ? &NEYOLOLayerKernel::yolo_layer_fp32_nhwc : &NEYOLOLayerKernel::yolo_layer_fp32_nchw; + _func = (_input->info()->data_layout() == DataLayout::NHWC) ? &NEYOLOLayerKernel::yolo_layer_nhwc : &NEYOLOLayerKernel::yolo_layer_nchw; break; default: ARM_COMPUTE_ERROR("Element size not supported"); break; } + Window win = calculate_max_window(*input->info(), Steps()); + // Configure kernel window - auto win_config = validate_and_configure_window(input->info(), (output == nullptr) ? nullptr : output->info()); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICPPKernel::configure(win_config.second); + if(output != nullptr) + { + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output->info(), *input->info()); + + Coordinates coord; + coord.set_num_dimensions(output->info()->num_dimensions()); + + output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); + } + + ICPPKernel::configure(win); } Status NEYOLOLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info, int32_t num_classes) { ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, act_info, num_classes)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), (output == nullptr) ? nullptr : output->clone().get()).first); return Status{}; } @@ -248,3 +204,4 @@ void NEYOLOLayerKernel::run(const Window &window, const ThreadInfo &info) (this->*_func)(window); } +} // namespace arm_compute -- cgit v1.2.1