From 2dc7e407141a1b213a31b9fa78a958d6652d4889 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Fri, 28 Feb 2020 14:41:35 +0000 Subject: COMPMID-3171: Remove padding from NESoftmaxLayerKernel Change-Id: Ia01ad8cda34c42e681b006f570e8d150d97fb208 Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2809 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio --- src/core/NEON/kernels/NESoftmaxLayerKernel.cpp | 135 ++++++++++--------------- tests/validation/NEON/SoftmaxLayer.cpp | 43 +------- 2 files changed, 54 insertions(+), 124 deletions(-) diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp index 95cbdf582b..790c8bacc5 100644 --- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp +++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -96,70 +96,52 @@ Status validate_arguments_logits_1d_max(const ITensorInfo &input, const ITensorI return Status{}; } -std::pair validate_and_configure_window_logits_1d_max(ITensorInfo &input, ITensorInfo &output) -{ - // Softmax across the x dimension - const TensorShape output_shape = TensorShape(input.tensor_shape()).set(0, 1); - // Output auto initialization if not yet initialized - auto_init_if_empty(output, output_shape, 1, input.data_type(), input.quantization_info()); - - // Configure kernel window - const int input_width = input.valid_region().shape.x(); - const int num_elems_processed_per_iteration = 16U / data_size_from_type(input.data_type()); - const int num_elems_read_per_iteration = ceil_to_multiple(input_width, num_elems_processed_per_iteration); - - const ValidRegion out_valid_region(ValidRegion(input.valid_region()).set(0, 0, 1)); - output.set_valid_region(out_valid_region); - - Window win = calculate_max_window(output); - - AccessWindowHorizontal input_access(&input, input.valid_region().anchor.x(), num_elems_read_per_iteration); - AccessWindowHorizontal output_access(&output, 0, 1); - - const bool window_changed = update_window_and_padding(win, input_access, output_access); - - const Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; - return std::make_pair(err, win); -} - template void logits_1d_max(const ITensor &in, ITensor &out, const Window &window) { - const auto start_x = in.info()->valid_region().anchor.x(); - const size_t input_width = in.info()->valid_region().shape.x(); - /** NEON vector tag type. */ using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t; - Iterator input(&in, window); - Iterator output(&out, window); + constexpr int window_step_x = 16 / sizeof(T); + const auto window_start_x = static_cast(window.x().start()); + const auto window_end_x = static_cast(window.x().end()); - constexpr int window_step_x = 16 / sizeof(T); - const int sum_stages = log2(window_step_x / 2); - execute_window_loop(window, [&](const Coordinates &) + Window win{ window }; + win.set(Window::DimX, Window::Dimension(0, 1, 1)); + Iterator input(&in, win); + Iterator output(&out, win); + + const int sum_stages = log2(window_step_x / 2); + execute_window_loop(win, [&](const Coordinates &) { // Get pointers - const auto in_ptr = reinterpret_cast(input.ptr()) + start_x; + const auto in_ptr = reinterpret_cast(input.ptr()); const auto out_ptr = reinterpret_cast(output.ptr()); // Init max value auto vec_max = wrapper::vdup_n(support::cpp11::lowest(), ExactTagType{}); + int x = window_start_x; - // Loop over input row - for(const T *it = in_ptr; it < (in_ptr + input_width); it += window_step_x) + for(; x <= (window_end_x - window_step_x); x += window_step_x) { - const auto current_value = wrapper::vloadq(it); + const auto current_value = wrapper::vloadq(in_ptr + x); vec_max = wrapper::vmax(vec_max, current_value); } - auto carry_max = wrapper::vpmax(wrapper::vgethigh(vec_max), wrapper::vgetlow(vec_max)); for(int i = 0; i < sum_stages; ++i) { carry_max = wrapper::vpmax(carry_max, carry_max); } - const T max_val = wrapper::vgetlane(carry_max, 0); - *out_ptr = max_val; + T max_val = wrapper::vgetlane(carry_max, 0); + + // Compute left-over elements + for(; x < window_end_x; ++x) + { + max_val = *(in_ptr + x) > max_val ? *(in_ptr + x) : max_val; + } + + *out_ptr = max_val; }, input, output); } @@ -182,8 +164,16 @@ void NELogits1DMaxKernel::configure(const ITensor *input, ITensor *output) // Perform validation step ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_logits_1d_max(*input->info(), *output->info())); // Configure kernel window - auto win_config = validate_and_configure_window_logits_1d_max(*input->info(), *output->info()); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); + + // Softmax across the x dimension + const TensorShape output_shape = TensorShape(input->info()->tensor_shape()).set(0, 1); + // Output auto initialization if not yet initialized + auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->quantization_info()); + + Window win = calculate_max_window(*input->info(), Steps()); + Coordinates coord; + coord.set_num_dimensions(output->info()->num_dimensions()); + output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); switch(input->info()->data_type()) { @@ -214,15 +204,13 @@ void NELogits1DMaxKernel::configure(const ITensor *input, ITensor *output) _border_size = BorderSize(0, num_elems_read_per_iteration - input_width, 0, 0); - INEKernel::configure(win_config.second); + INEKernel::configure(win); } Status NELogits1DMaxKernel::validate(const ITensorInfo *input, const ITensorInfo *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_logits_1d_max(*input, *output)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window_logits_1d_max(*input->clone(), *output->clone()).first); return Status{}; } @@ -275,37 +263,6 @@ Status validate_arguments_logits_softmax(const ITensorInfo &input, const ITensor return Status{}; } - -std::pair validate_and_configure_window_logits_softmax(ITensorInfo &input, ITensorInfo &max, - ITensorInfo &output, ITensorInfo &tmp, bool is_log) -{ - const bool is_quantized_asymmetric = is_data_type_quantized_asymmetric(input.data_type()); - - // Output auto initialization if not yet initialized - const QuantizationInfo output_quantization = is_quantized_asymmetric ? arm_compute::get_softmax_output_quantization_info(input.data_type(), is_log) : output.quantization_info(); - auto_init_if_empty(output, TensorInfo(input).set_quantization_info(output_quantization).reset_padding()); - - // Tmp auto initialization if not yet initialized - const DataType tmp_data_type = is_quantized_asymmetric ? DataType::F32 : input.data_type(); - auto_init_if_empty(tmp, TensorInfo(input).set_data_type(tmp_data_type).reset_padding()); - - const int input_width = input.valid_region().shape.x(); - - Window win = calculate_max_window(max); - - AccessWindowHorizontal input_access(&input, input.valid_region().anchor.x(), input_width); - AccessWindowHorizontal max_access(&input, 0, 1); - AccessWindowHorizontal output_access(&output, input.valid_region().anchor.x(), input_width); - AccessWindowHorizontal tmp_access(&tmp, input.valid_region().anchor.x(), input_width); - - const bool window_changed = update_window_and_padding(win, input_access, max_access, output_access, tmp_access); - - output.set_valid_region(input.valid_region()); - - const Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; - return std::make_pair(err, win); -} - template void logits_1d_softmax_qasymm8(const ITensor &in, const ITensor &max, void *const tmp, ITensor &out, const float beta, const Window &window) { @@ -610,9 +567,23 @@ void NELogits1DSoftmaxKernel::configure(const ITensor *input, const ITen ARM_COMPUTE_ERROR_ON_NULLPTR(input->info(), max->info(), output->info(), tmp->info()); // Perform validation step ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_logits_softmax(*input->info(), *max->info(), *output->info(), beta, *tmp->info(), IS_LOG)); + + // Configure kernel window + const bool is_quantized_asymmetric = is_data_type_quantized_asymmetric(input->info()->data_type()); + + // Output auto initialization if not yet initialized + const QuantizationInfo output_quantization = is_quantized_asymmetric ? arm_compute::get_softmax_output_quantization_info(input->info()->data_type(), IS_LOG) : output->info()->quantization_info(); + auto_init_if_empty(*output->info(), TensorInfo(*input->info()).set_quantization_info(output_quantization).reset_padding()); + + // Tmp auto initialization if not yet initialized + const DataType tmp_data_type = is_quantized_asymmetric ? DataType::F32 : input->info()->data_type(); + auto_init_if_empty(*tmp->info(), TensorInfo(*input->info()).set_data_type(tmp_data_type).reset_padding()); + // Configure kernel window - auto win_config = validate_and_configure_window_logits_softmax(*input->info(), *max->info(), *output->info(), *tmp->info(), IS_LOG); - ARM_COMPUTE_ERROR_THROW_ON(win_config.first); + Window win = calculate_max_window(*max->info(), Steps()); + Coordinates coord; + coord.set_num_dimensions(output->info()->num_dimensions()); + output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); switch(input->info()->data_type()) { @@ -641,7 +612,7 @@ void NELogits1DSoftmaxKernel::configure(const ITensor *input, const ITen _beta = beta; _tmp = tmp; - INEKernel::configure(win_config.second); + INEKernel::configure(win); } template @@ -649,9 +620,7 @@ Status NELogits1DSoftmaxKernel::validate(const ITensorInfo *input, const const ITensorInfo *output, const float beta, const ITensorInfo *tmp) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, max, output, tmp); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_logits_softmax(*input, *max, *output, beta, *tmp, IS_LOG)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window_logits_softmax(*input->clone(), *max->clone(), *output->clone(), *tmp->clone(), IS_LOG).first); return Status{}; } diff --git a/tests/validation/NEON/SoftmaxLayer.cpp b/tests/validation/NEON/SoftmaxLayer.cpp index cbf7729bc3..c429782e60 100644 --- a/tests/validation/NEON/SoftmaxLayer.cpp +++ b/tests/validation/NEON/SoftmaxLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -63,37 +63,6 @@ const auto CNNDataTypes = framework::dataset::make("DataType", TEST_SUITE(NEON) TEST_SUITE(SoftmaxLayer) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(concat(datasets::Small2DShapes(), datasets::Medium2DShapes()), CNNDataTypes), shape, data_type) -{ - const QuantizationInfo quantization_info = is_data_type_quantized_asymmetric(data_type) ? QuantizationInfo(1.f / 255.f, 0) : QuantizationInfo(); - - // Create tensors - Tensor src = create_tensor(shape, data_type, 1, quantization_info); - Tensor dst = create_tensor(shape, data_type, 1, QuantizationInfo(1.f / 256.f, 0)); - - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); - - // Create and configure function - NESoftmaxLayer smx_layer; - smx_layer.configure(&src, &dst); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(src.info()->valid_region(), valid_region); - validate(dst.info()->valid_region(), valid_region); - - // NESoftmaxLayer configures the paddings only in the 2D case - if(shape.num_dimensions() <= 2) - { - // Validate padding - const int step = 16 / data_size_from_type(data_type); - const PaddingSize padding = PaddingCalculator(shape.x(), step).required_padding(); - validate(src.info()->padding(), padding); - validate(dst.info()->padding(), PaddingSize()); - } -} - // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( @@ -101,8 +70,6 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Mismatching shapes TensorInfo(TensorShape(27U, 13U), 1, DataType::QASYMM8, // Invalid output quantization info QuantizationInfo(1.f/256, 12)), - TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Window shrink - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),// Invalid input dimensionality TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 12)), @@ -113,8 +80,6 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( TensorInfo(TensorShape(27U, 11U), 1, DataType::F32), TensorInfo(TensorShape(27U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 12)), - TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 0)), @@ -128,19 +93,15 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( 1.0, 2.0, 1.0, - 2.0, - 1.0, })), framework::dataset::make("axis", { 1, 1, 1, 1, 1, - 1, - 1, 0, })), - framework::dataset::make("Expected", { false, false, false, false, false, true, true, false })), + framework::dataset::make("Expected", { false, false, false, true, true, false })), input_info, output_info, beta, axis, expected) { ARM_COMPUTE_EXPECT(bool(NESoftmaxLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), beta, axis)) == expected, framework::LogLevel::ERRORS); -- cgit v1.2.1