From afa5d817b1d083837cd7ea30d32f845d82620c12 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Thu, 30 Nov 2017 14:25:57 +0000 Subject: COMPMID-617 Add validation methods to Kernels - NEActivationLayer - NESoftmax - NEDirectConvolutionLayer - NENormalizationLayer - NEPoolingLayer Change-Id: Ib279f1c1b7f9247679b0d6593aed7393da8fe87b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111335 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com Reviewed-by: Georgios Pinitas --- src/core/NEON/kernels/NESoftmaxLayerKernel.cpp | 230 +++++++++++++++++++------ 1 file changed, 180 insertions(+), 50 deletions(-) (limited to 'src/core/NEON/kernels/NESoftmaxLayerKernel.cpp') diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp index a8a0f59a41..b13fb0e87c 100644 --- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp +++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp @@ -42,6 +42,149 @@ using namespace arm_compute; namespace { +Status validate_arguments_logits_1d_max(const ITensorInfo *input, const ITensorInfo *output) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); + + // Checks performed when output is configured + if(output->total_size() != 0) + { + // Softmax across the x dimension + TensorShape output_shape{ input->tensor_shape() }; + output_shape.set(0, 1); + + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape); + } + + return Status{}; +} + +std::pair validate_and_configure_window_logits_1d_max(ITensorInfo *input, ITensorInfo *output) +{ + // Configure kernel window + constexpr unsigned int num_elems_written_per_row = 1; + const int input_width = input->valid_region().shape.x(); + + unsigned int num_elems_processed_per_iteration = 16 / data_size_from_type(input->data_type()); + Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); + AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); + bool window_changed = false; + + if(output->total_size() != 0) + { + AccessWindowHorizontal output_access(output, 0, num_elems_written_per_row, 1.f / input_width); + window_changed = update_window_and_padding(win, input_access, output_access); + output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape())); + } + else + { + window_changed = update_window_and_padding(win, input_access); + } + + Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + return std::make_pair(err, win); +} + +Status validate_arguments_logits_1d_shift_exp_sum(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum, float beta) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, max, sum, output); + ARM_COMPUTE_RETURN_ERROR_ON((beta != 1.0f) && is_data_type_fixed_point(input->data_type())); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); + + // Checks performed when output is configured + if(output->total_size() != 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output); + } + + // Checks performed when sum is configured + if(sum->total_size() != 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, max, sum); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(max, sum); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, max, sum); + } + + return Status{}; +} + +std::pair validate_and_configure_window_logits_1d_shift_exp_sum(ITensorInfo *input, ITensorInfo *max, ITensorInfo *output, ITensorInfo *sum) +{ + unsigned int num_elems_processed_per_iteration = input->valid_region().shape.x(); + + // Configure kernel window + Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); + AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); + AccessWindowHorizontal max_access(max, 0, 1); + AccessWindowHorizontal sum_access(sum, 0, 1); + bool window_changed = false; + + if(output->total_size() != 0) + { + AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); + window_changed = update_window_and_padding(win, input_access, max_access, output_access, sum_access); + output_access.set_valid_region(win, input->valid_region()); + } + else + { + window_changed = update_window_and_padding(win, input_access, max_access, sum_access); + } + + sum_access.set_valid_region(win, ValidRegion(Coordinates(), sum->tensor_shape())); + + Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + return std::make_pair(err, win); +} + +Status validate_arguments_logits_1d_norm(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, sum, output); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::S32, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, sum); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, sum); + + // Checks performed when output is configured + if(output->total_size() != 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output); + } + + return Status{}; +} + +std::pair validate_and_configure_window_logits_1d_norm(ITensorInfo *input, ITensorInfo *sum, ITensorInfo *output) +{ + // Configure kernel window + unsigned int num_elems_processed_per_iteration = 16 / data_size_from_type(input->data_type()); + Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); + + AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); + AccessWindowStatic sum_access(sum, 0, 0, 1, sum->dimension(1)); + bool window_changed = false; + + if(output->total_size() != 0) + { + AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); + + window_changed = update_window_and_padding(win, input_access, sum_access, output_access); + + output_access.set_valid_region(win, input->valid_region()); + } + else + { + window_changed = update_window_and_padding(win, input_access, sum_access); + } + Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + return std::make_pair(err, win); +} + void logits_1d_max_qs8(const ITensor *in, ITensor *out, const Window &window) { Window in_slice = window.first_slice_window_1D(); @@ -184,8 +327,7 @@ BorderSize NELogits1DMaxKernel::border_size() const void NELogits1DMaxKernel::configure(const ITensor *input, ITensor *output) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_NULLPTR(output); + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); // Softmax across the x dimension TensorShape output_shape{ input->info()->tensor_shape() }; @@ -194,9 +336,8 @@ void NELogits1DMaxKernel::configure(const ITensor *input, ITensor *output) // Output auto initialization if not yet initialized auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position()); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); + // Perform validation step + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_logits_1d_max(input->info(), output->info())); const int input_width = input->info()->valid_region().shape.x(); unsigned int num_elems_processed_per_iteration = 16 / data_size_from_type(input->info()->data_type()); @@ -226,17 +367,17 @@ void NELogits1DMaxKernel::configure(const ITensor *input, ITensor *output) _border_size = BorderSize(0, num_elems_processed_per_iteration - (input_width % num_elems_processed_per_iteration), 0, 0); // Configure kernel window - constexpr unsigned int num_elems_written_per_row = 1; - - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_row, 1.f / input_width); - - update_window_and_padding(win, input_access, output_access); + auto win_config = validate_and_configure_window_logits_1d_max(input->info(), output->info()); + ARM_COMPUTE_ERROR_THROW_ON(win_config.first); + INEKernel::configure(win_config.second); +} - output_access.set_valid_region(win, ValidRegion(Coordinates(), output->info()->tensor_shape())); +Status NELogits1DMaxKernel::validate(const ITensorInfo *input, const ITensorInfo *output) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_logits_1d_max(input, output)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window_logits_1d_max(input->clone().get(), output->clone().get()).first); - INEKernel::configure(win); + return Status{}; } void NELogits1DMaxKernel::run(const Window &window, const ThreadInfo &info) @@ -512,20 +653,14 @@ NELogits1DShiftExpSumKernel::NELogits1DShiftExpSumKernel() void NELogits1DShiftExpSumKernel::configure(const ITensor *input, const ITensor *max, ITensor *output, ITensor *sum, float beta) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_NULLPTR(max, sum, output); - ARM_COMPUTE_ERROR_ON((beta != 1.0f) && is_data_type_fixed_point(input->info()->data_type())); + ARM_COMPUTE_ERROR_ON_NULLPTR(input, max, sum, output); // Output auto initialization if not yet initialized auto_init_if_empty(*sum->info(), max->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position()); auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position()); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, max, sum); - ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output, max, sum); - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(max, sum); - - unsigned int num_elems_processed_per_iteration = input->info()->valid_region().shape.x(); + // Perform validation step + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_logits_1d_shift_exp_sum(input->info(), max->info(), output->info(), sum->info(), beta)); switch(input->info()->data_type()) { @@ -555,18 +690,17 @@ void NELogits1DShiftExpSumKernel::configure(const ITensor *input, const ITensor _beta = beta; // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal max_access(max->info(), 0, 1); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - AccessWindowHorizontal sum_access(sum->info(), 0, 1); - - update_window_and_padding(win, input_access, max_access, output_access, sum_access); + auto win_config = validate_and_configure_window_logits_1d_shift_exp_sum(input->info(), max->info(), output->info(), sum->info()); + ARM_COMPUTE_ERROR_THROW_ON(win_config.first); + INEKernel::configure(win_config.second); +} - output_access.set_valid_region(win, input->info()->valid_region()); - sum_access.set_valid_region(win, ValidRegion(Coordinates(), sum->info()->tensor_shape())); +Status NELogits1DShiftExpSumKernel::validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum, float beta) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_logits_1d_shift_exp_sum(input, max, output, sum, beta)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window_logits_1d_shift_exp_sum(input->clone().get(), max->clone().get(), output->clone().get(), sum->clone().get()).first); - INEKernel::configure(win); + return Status{}; } void NELogits1DShiftExpSumKernel::run(const Window &window, const ThreadInfo &info) @@ -717,23 +851,18 @@ NELogits1DNormKernel::NELogits1DNormKernel() void NELogits1DNormKernel::configure(const ITensor *input, const ITensor *sum, ITensor *output) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_NULLPTR(sum, output); + ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output); // Output auto initialization if not yet initialized auto_init_if_empty(*output->info(), input->info()->tensor_shape(), 1, input->info()->data_type(), input->info()->fixed_point_position()); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, sum, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, sum, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); + // Perform validation step + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_logits_1d_norm(input->info(), sum->info(), output->info())); _input = input; _sum = sum; _output = output; - // Configure kernel window - unsigned int num_elems_processed_per_iteration = 16 / data_size_from_type(input->info()->data_type()); - switch(input->info()->data_type()) { case DataType::QS8: @@ -755,17 +884,18 @@ void NELogits1DNormKernel::configure(const ITensor *input, const ITensor *sum, I break; } - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); - - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - AccessWindowStatic sum_access(sum->info(), 0, 0, 1, sum->info()->dimension(1)); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - - update_window_and_padding(win, input_access, sum_access, output_access); + // Configure kernel window + auto win_config = validate_and_configure_window_logits_1d_norm(input->info(), sum->info(), output->info()); + ARM_COMPUTE_ERROR_THROW_ON(win_config.first); + INEKernel::configure(win_config.second); +} - output_access.set_valid_region(win, input->info()->valid_region()); +Status NELogits1DNormKernel::validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_logits_1d_norm(input, sum, output)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window_logits_1d_norm(input->clone().get(), sum->clone().get(), output->clone().get()).first); - INEKernel::configure(win); + return Status{}; } void NELogits1DNormKernel::run(const Window &window, const ThreadInfo &info) -- cgit v1.2.1