From cd5b4a30d4208f9eadce7e9942526e000f72ee97 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Thu, 27 Feb 2020 15:35:53 +0000 Subject: COMPMID-3146: Remove padding from NERangeKernel Change-Id: I9115c0c6f373bb00dbd058e76a391fe05108de69 Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2801 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio --- src/core/NEON/kernels/NERangeKernel.cpp | 67 ++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 30 deletions(-) (limited to 'src/core/NEON/kernels/NERangeKernel.cpp') diff --git a/src/core/NEON/kernels/NERangeKernel.cpp b/src/core/NEON/kernels/NERangeKernel.cpp index 189e77f0fc..28107226df 100644 --- a/src/core/NEON/kernels/NERangeKernel.cpp +++ b/src/core/NEON/kernels/NERangeKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -41,7 +41,6 @@ namespace template void range_function(ITensor *output, float start, float step, const Window &window) { - const unsigned int num_elems_processed_per_iteration = 16 / sizeof(T); /** NEON vector tag type. */ using ExactTagType = typename wrapper::traits::neon_bitvector::tag_type; @@ -49,17 +48,37 @@ void range_function(ITensor *output, float start, float step, const Window &wind const auto start_vec = wrapper::vdup_n(static_cast(start), ExactTagType{}); auto id_vec = wrapper::vdup_n(static_cast(0.f), ExactTagType{}); - Iterator output_it(output, window); - execute_window_loop(window, [&](const Coordinates & id) + const auto window_start_x = static_cast(window.x().start()); + const auto window_end_x = static_cast(window.x().end()); + const int window_step_x = 16 / sizeof(T); + + Window win{ window }; + win.set(Window::DimX, Window::Dimension(0, 1, 1)); + Iterator output_it(output, win); + + execute_window_loop(win, [&](const Coordinates &) { - for(unsigned int count = 0; count < num_elems_processed_per_iteration; ++count) + int x = window_start_x; + const auto out_ptr = reinterpret_cast(output_it.ptr()); + for(; x <= (window_end_x - window_step_x); x += window_step_x) { - id_vec = wrapper::vsetlane(static_cast(id.x() + count), id_vec, count); + for(unsigned int count = 0; count < window_step_x; ++count) + { + id_vec = wrapper::vsetlane(static_cast(x + count), id_vec, count); + } + + // start + step * id + const auto res_vec = wrapper::vmla(start_vec, id_vec, step_vec); + wrapper::vstore(out_ptr + x, res_vec); } - // start + step * id - const auto res_vec = wrapper::vmla(start_vec, id_vec, step_vec); - const auto out_ptr = reinterpret_cast(output_it.ptr()); - wrapper::vstore(out_ptr, res_vec); + + // Compute left-over elements + for(; x < window_end_x; ++x) + { + const auto res = start + x * step; + *(out_ptr + x) = res; + } + }, output_it); } @@ -88,22 +107,6 @@ Status validate_arguments(const ITensorInfo &output, const float start, const fl return Status{}; } - -std::pair validate_and_configure_window(ITensorInfo &output, const float start, const float end, const float step) -{ - const unsigned int num_elems_processed_per_iteration = 16 / output.element_size(); - - // Auto initialize output if not initialized - auto_init_if_empty(output, TensorShape(num_of_elements_in_range(start, end, step)), 1, output.data_type(), output.quantization_info()); - - // Configure kernel window - Window win = calculate_max_window(output, Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal output_access(&output, 0, num_elems_processed_per_iteration); - bool window_changed = update_window_and_padding(win, output_access); - output_access.set_valid_region(win, ValidRegion(Coordinates(), TensorShape(num_of_elements_in_range(start, end, step)))); - Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; - return std::make_pair(err, win); -} } // namespace NERangeKernel::NERangeKernel() @@ -117,9 +120,14 @@ void NERangeKernel::configure(ITensor *output, float start, float end, float ste ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*(output->info()), start, end, step)); + // Auto initialize output if not initialized + auto_init_if_empty(*output->info(), TensorShape(num_of_elements_in_range(start, end, step)), 1, output->info()->data_type(), output->info()->quantization_info()); + // Configure kernel window - auto win_config = validate_and_configure_window(*(output->info()), start, end, step); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); + Window win = calculate_max_window(*output->info(), Steps()); + Coordinates coord; + coord.set_num_dimensions(output->info()->num_dimensions()); + output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); _start = start; _end = end; @@ -158,7 +166,7 @@ void NERangeKernel::configure(ITensor *output, float start, float end, float ste break; } - INEKernel::configure(win_config.second); + INEKernel::configure(win); } Status NERangeKernel::validate(const ITensorInfo *output, float start, float end, float step) @@ -166,7 +174,6 @@ Status NERangeKernel::validate(const ITensorInfo *output, float start, float end ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*output, start, end, step)); - ARM_COMPUTE_RETURN_ON_ERROR((validate_and_configure_window(*(output->clone()), start, end, step)).first); return Status{}; } -- cgit v1.2.1