From b4e3e1c371d8091e86ee1c6e704057559bbe1554 Mon Sep 17 00:00:00 2001 From: Ioan-Cristian Szabo Date: Thu, 30 Nov 2017 17:17:17 +0000 Subject: COMPMID-617: Add validate support for NEON FullyConnectedLayer Change-Id: I08987022c8d4cc335c00b8af27bd3edb8fe64d3b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111596 Tested-by: Jenkins Reviewed-by: Alexander Gilday Reviewed-by: Anthony Barbier --- .../kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp | 69 +++++++++++++----- .../NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp | 85 ++++++++++++++++++---- src/core/NEON/kernels/NEIm2ColKernel.cpp | 40 ++++++++-- 3 files changed, 154 insertions(+), 40 deletions(-) (limited to 'src/core/NEON/kernels') diff --git a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp index 3dd59bddd6..cab3c7a58f 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -39,6 +39,42 @@ using namespace arm_compute; +namespace +{ +inline Status validate_arguments(const ITensorInfo *accum, const ITensorInfo *biases) +{ + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(biases, accum); + ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1); + ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != accum->dimension(0)); + + return Status{}; +} + +inline std::pair validate_and_configure_window(ITensorInfo *accum, ITensorInfo *biases) +{ + constexpr unsigned int num_elems_processed_per_iteration = 16; + + // Configure kernel window + Window win = calculate_max_window(*accum, Steps(num_elems_processed_per_iteration)); + + bool window_changed = update_window_and_padding(win, + AccessWindowHorizontal(accum, 0, num_elems_processed_per_iteration), + AccessWindowStatic(biases, 0, 0, ceil_to_multiple(biases->dimension(0), num_elems_processed_per_iteration), biases->tensor_shape().y())); + + AccessWindowHorizontal output_access(accum, 0, num_elems_processed_per_iteration); + + // Set the valid region for the accum tensor + Coordinates coord; + coord.set_num_dimensions(accum->num_dimensions()); + output_access.set_valid_region(win, ValidRegion(coord, accum->tensor_shape())); + + Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + return std::make_pair(err, win); +} +} // namespace + NEGEMMMatrixAccumulateBiasesKernel::NEGEMMMatrixAccumulateBiasesKernel() : _accum(nullptr), _biases(nullptr) { @@ -46,31 +82,26 @@ NEGEMMMatrixAccumulateBiasesKernel::NEGEMMMatrixAccumulateBiasesKernel() void NEGEMMMatrixAccumulateBiasesKernel::configure(ITensor *accum, const ITensor *biases) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum); - ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(biases, accum); - ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1); + ARM_COMPUTE_ERROR_ON_NULLPTR(accum, biases); + + // Perform validate step + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(accum->info(), biases->info())); _biases = biases; _accum = accum; - constexpr unsigned int num_elems_processed_per_iteration = 16; - // Configure kernel window - Window win = calculate_max_window(*accum->info(), Steps(num_elems_processed_per_iteration)); - - update_window_and_padding(win, - AccessWindowHorizontal(accum->info(), 0, num_elems_processed_per_iteration), - AccessWindowStatic(biases->info(), 0, 0, ceil_to_multiple(biases->info()->dimension(0), num_elems_processed_per_iteration), biases->info()->tensor_shape().y())); - - AccessWindowHorizontal output_access(accum->info(), 0, num_elems_processed_per_iteration); + auto win_config = validate_and_configure_window(accum->info(), biases->info()); + ARM_COMPUTE_ERROR_THROW_ON(win_config.first); + INEKernel::configure(win_config.second); +} - // Set the valid region for the accum tensor - Coordinates coord; - coord.set_num_dimensions(accum->info()->num_dimensions()); - output_access.set_valid_region(win, ValidRegion(coord, accum->info()->tensor_shape())); +Status NEGEMMMatrixAccumulateBiasesKernel::validate(const ITensorInfo *accum, const ITensorInfo *biases) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(accum, biases)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(accum->clone().get(), biases->clone().get()).first); - INEKernel::configure(win); + return Status{}; } void NEGEMMMatrixAccumulateBiasesKernel::run(const Window &window, const ThreadInfo &info) diff --git a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp index aa5e2dd0dd..69b052a9bd 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -36,6 +36,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" + #include #include #include @@ -1409,27 +1411,73 @@ void matrix_matrix_multiply_qs16(const ITensor *input0, const ITensor *input1, I ina, inb, out); } -Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output) +inline Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info) { + ARM_COMPUTE_UNUSED(alpha); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F16, DataType::F32, DataType::QS8, DataType::QS16); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input0, input1, output); - ARM_COMPUTE_UNUSED(input0); - ARM_COMPUTE_UNUSED(input1); - ARM_COMPUTE_UNUSED(output); - if(output->dimension(1) == 1) + if(!is_interleaved) { ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(0) != input1->dimension(1)); + + if(output->total_size() != 0) + { + ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) != output->dimension(0)); + ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) != output->dimension(1)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input0, output); + } + } + else + { + const int m = reshape_info.m(); + const int n = reshape_info.n(); + const int k = reshape_info.k(); + const int mult_transpose1xW_width = reshape_info.mult_transpose1xW_width(); + const int mult_interleave4x4_height = reshape_info.mult_interleave4x4_height(); + + /* Interleave */ + TensorShape tensor_shape0{ input0->tensor_shape() }; + tensor_shape0.set(0, k); + tensor_shape0.set(1, m); + + const TensorInfo tensor_info0 = input0->clone()->set_tensor_shape(tensor_shape0); + const TensorInfo tensor_info_reshaped0 = input0->clone()->set_tensor_shape(misc::shape_calculator::compute_interleaved_shape(tensor_info0, mult_interleave4x4_height)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input0, &tensor_info_reshaped0); + + if(n != 0) /* Transpose */ + { + TensorShape tensor_shape1{ input1->tensor_shape() }; + tensor_shape1.set(0, n); + tensor_shape1.set(1, k); + + const TensorInfo tensor_info1 = input1->clone()->set_tensor_shape(tensor_shape1); + const TensorInfo tensor_info_reshaped1 = input1->clone()->set_tensor_shape(misc::shape_calculator::compute_transpose1xW_with_element_size_shape(tensor_info1, mult_transpose1xW_width)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, &tensor_info_reshaped1); + } + + if(output->total_size() != 0) + { + if(n != 0) + { + ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != static_cast(n)); + } + ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != static_cast(m)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input0, output); + } } return Status{}; } -std::pair validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1, ITensorInfo *output) +inline std::pair validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1, ITensorInfo *output) { - Window win = Window(); - bool window_changed = false; + bool window_changed{}; + Window win{}; unsigned int num_elems_processed_per_iteration_x = 0; const unsigned int num_elems_processed_per_iteration_y = 4; @@ -1538,11 +1586,19 @@ NEGEMMMatrixMultiplyKernel::NEGEMMMatrixMultiplyKernel() { } -void NEGEMMMatrixMultiplyKernel::configure(const ITensor *input0, const ITensor *input1, ITensor *output, float alpha) +void NEGEMMMatrixMultiplyKernel::configure(const ITensor *input0, const ITensor *input1, ITensor *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info) { - // Perform validate step ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input0->info(), input1->info(), output->info())); + + // Output tensor auto inizialitation if not yet initialized + TensorShape tensor_shape{ input0->info()->tensor_shape() }; + tensor_shape.set(0, is_interleaved ? reshape_info.n() : input1->info()->dimension(0)); + tensor_shape.set(1, is_interleaved ? reshape_info.m() : input0->info()->dimension(1)); + + auto_init_if_empty(*output->info(), input0->info()->clone()->set_tensor_shape(tensor_shape)); + + // Perform validate step + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input0->info(), input1->info(), output->info(), alpha, is_interleaved, reshape_info)); _input0 = input0; _input1 = input1; @@ -1555,9 +1611,10 @@ void NEGEMMMatrixMultiplyKernel::configure(const ITensor *input0, const ITensor INEKernel::configure(win_config.second); } -Status NEGEMMMatrixMultiplyKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output) +Status NEGEMMMatrixMultiplyKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved, + const GEMMReshapeInfo &reshape_info) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, output)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, output, alpha, is_interleaved, reshape_info)); ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input0->clone().get(), input1->clone().get(), output->clone().get()).first); return Status{}; diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index 633f78de4b..4fa329bf44 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -32,6 +32,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" + #include #include #include @@ -42,14 +44,34 @@ using namespace arm_compute; namespace { -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, + bool has_bias, bool is_fully_connected, bool is_flatten) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output); ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::QASYMM8 && has_bias); - ARM_COMPUTE_UNUSED(kernel_dims); - ARM_COMPUTE_UNUSED(conv_info); + + if(is_flatten) /* Called by FlattenLayer */ + { + size_t flatten_shape = input->tensor_shape().x() * input->tensor_shape().y() * input->tensor_shape().z(); + ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != flatten_shape); + } + else if(!is_fully_connected) /* Called by ConvolutionLayer */ + { + std::pair out_dims = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_dims.width, kernel_dims.height, conv_info); + ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != (input->dimension(2) * kernel_dims.area() + (has_bias ? 1 : 0))); + ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != (out_dims.first * out_dims.second)); + ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(2) != 1); + } + else /* Called by FullyConnectedLayer */ + { + const int num_batch_dimensions = std::max(0, static_cast(output->tensor_shape().num_dimensions()) - 1); + const int num_input_dimensions = input->tensor_shape().num_dimensions() - num_batch_dimensions; + + TensorInfo expected_output = output->clone()->set_tensor_shape(misc::shape_calculator::compute_im2col_shape(input, num_input_dimensions)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&expected_output, output); + } return Status{}; } @@ -291,12 +313,15 @@ NEIm2ColKernel::NEIm2ColKernel() { } -void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) +void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, + bool has_bias, bool is_fully_connected, bool is_flatten) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), kernel_dims, conv_info, has_bias)); + ARM_COMPUTE_UNUSED(is_fully_connected); + ARM_COMPUTE_UNUSED(is_flatten); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten)); _input = input; _output = output; @@ -382,9 +407,10 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size IKernel::configure(window); } -Status NEIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) +Status NEIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, + bool has_bias, bool is_fully_connected, bool is_flatten) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, kernel_dims, conv_info, has_bias)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten)); return Status{}; } -- cgit v1.2.1