From b4e3e1c371d8091e86ee1c6e704057559bbe1554 Mon Sep 17 00:00:00 2001 From: Ioan-Cristian Szabo Date: Thu, 30 Nov 2017 17:17:17 +0000 Subject: COMPMID-617: Add validate support for NEON FullyConnectedLayer Change-Id: I08987022c8d4cc335c00b8af27bd3edb8fe64d3b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/111596 Tested-by: Jenkins Reviewed-by: Alexander Gilday Reviewed-by: Anthony Barbier --- .../kernels/NEGEMMMatrixAccumulateBiasesKernel.h | 8 + .../core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h | 26 ++- arm_compute/core/NEON/kernels/NEIm2ColKernel.h | 38 ++-- arm_compute/core/utils/misc/ShapeCalculator.h | 37 ++++ .../runtime/NEON/functions/NEFullyConnectedLayer.h | 24 ++- .../NEON/functions/NEGEMMConvolutionLayer.h | 14 +- .../runtime/NEON/functions/NEGEMMTranspose1xW.h | 10 +- arm_compute/runtime/NEON/functions/NEIm2Col.h | 36 ++-- .../kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp | 69 +++++-- .../NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp | 85 +++++++-- src/core/NEON/kernels/NEIm2ColKernel.cpp | 40 +++- src/graph/nodes/FullyConnectedLayer.cpp | 2 +- src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 4 +- src/runtime/NEON/functions/NEFlattenLayer.cpp | 4 +- .../NEON/functions/NEFullyConnectedLayer.cpp | 212 ++++++++++++++------- src/runtime/NEON/functions/NEGEMM.cpp | 8 +- .../NEON/functions/NEGEMMConvolutionLayer.cpp | 48 ++--- src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp | 6 +- src/runtime/NEON/functions/NEIm2Col.cpp | 10 +- tests/validation/NEON/FullyConnectedLayer.cpp | 46 +++++ tests/validation/NEON/Im2Col.cpp | 10 +- 21 files changed, 535 insertions(+), 202 deletions(-) diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h index 051b8b7d4f..e48a9a77e4 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h @@ -55,6 +55,14 @@ public: * @param[in] biases The shared biases tensor to append. It must be 1D Tensor. Data type supported: Same as @p input */ void configure(ITensor *accum, const ITensor *biases); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixAccumulateBiasesKernel + * + * @param[in] accum The accumulate tensor to convert. Data type supported: QS8/QS16/F32 + * @param[in] biases The shared biases tensor to append. It must be 1D Tensor. Data type supported: Same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *accum, const ITensorInfo *biases); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; diff --git a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h index 4598e15b8e..d54522c678 100644 --- a/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h +++ b/arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h @@ -58,22 +58,28 @@ public: * @note If the output tensor is a matrix, the input matrices @p input0 and @p input1 should be the output of the kernels: @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel * These two kernels change the layout of the original matrices to be more cache-friendly. * - * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: QS8/QS16/F16/F32 - * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector. - * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0 - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. - * @param[in] alpha Weight of the matrix product + * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: QS8/QS16/F16/F32 + * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector. + * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0 + * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. + * @param[in] alpha Weight of the matrix product + * @param[in] is_interleaved (Optional) True if input0 and input1 have been reshaped respectively using @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel + * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped */ - void configure(const ITensor *input0, const ITensor *input1, ITensor *output, float alpha); + void configure(const ITensor *input0, const ITensor *input1, ITensor *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMMatrixMultiplyKernel * - * @param[in] input0 Input tensor containing the Matrix A. Data types supported: QS8/QS16/F16/F32 - * @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0 - * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 + * @param[in] input0 Input tensor containing the interleaved Matrix A or the vector A. Data types supported: QS8/QS16/F16/F32 + * @param[in] input1 Input tensor containing the transposed Matrix B if the first input tensor A is not a vector. + * If the output tensor is a vector, input1 must contain the matrix B not reshaped. Data type supported: same as @p input0 + * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. + * @param[in] alpha Weight of the matrix product + * @param[in] is_interleaved (Optional) True if input0 and input1 have been reshaped respectively using @ref NEGEMMInterleave4x4Kernel and @ref NEGEMMTranspose1xWKernel + * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped * * @return a status */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output); + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; diff --git a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h index 6d11fbee11..1659b725bb 100644 --- a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h +++ b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h @@ -76,28 +76,34 @@ public: /** Set the input and output of the kernel. * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/QS16/QASYMM8/F16/F32 - * Note: QASYMM8 works only for has_bias = false - * @param[out] output The output tensor. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/QS16/QASYMM8/F16/F32 + * Note: QASYMM8 works only for has_bias = false + * @param[out] output The output tensor. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + * @param[in] is_fully_connected Determines whether this kernel will be called by @ref NEFullyConnectedLayer in order to validate the arguments + * @param[in] is_flatten (Optional) Determines whether this kernel will be called by @ref NEFlattenLayer in order to validate the arguments */ - void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias); + void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, + bool has_bias, bool is_fully_connected = false, bool is_flatten = false); /** Static function to check if given info will lead to a valid configuration of @ref NEIm2ColKernel * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/QS16/QASYMM8/F16/F32 - * Note: QASYMM8 works only for has_bias = false - * @param[in] output The output tensor. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/QS16/QASYMM8/F16/F32 + * Note: QASYMM8 works only for has_bias = false + * @param[in] output The output tensor. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + * @param[in] is_fully_connected Determines whether this kernel will be called by @ref NEFullyConnectedLayer in order to validate the arguments + * @param[in] is_flatten (Optional) Determines whether this kernel will be called by @ref NEFlattenLayer in order to validate the arguments * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, + bool has_bias, bool is_fully_connected, bool is_flatten = false); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index c53ac4c71f..e21e5cd0d6 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -157,6 +157,43 @@ inline TensorShape compute_deconvolution_shape(const ITensorInfo &input, unsigne return scale_out_shape; } +inline TensorShape compute_im2col_shape(const ITensorInfo *input, const int num_input_dimensions = 3) +{ + TensorShape output_shape{ input->tensor_shape() }; + + output_shape.collapse(num_input_dimensions); + + return output_shape; +} +inline TensorShape compute_interleave_custom_shape(const TensorShape &input, const int x_interleave, const int y_interleave) +{ + TensorShape output_shape{ input }; + + output_shape.set(0, output_shape.x() * x_interleave); + output_shape.set(1, std::ceil(output_shape.y() / static_cast(y_interleave))); + + return output_shape; +} + +inline TensorShape compute_fully_connected_reshaped_weights_shape(const ITensorInfo *input, bool transpose_weights, bool is_batched_fc_layer, const int interleave) +{ + TensorShape output_shape{ input->tensor_shape() }; + + // Transpose weights if the user hasn't done it + if(transpose_weights) + { + output_shape = compute_transposed_shape(*input); + } + + // If the we run multiple batches we need 1xW transpose, too. + if(is_batched_fc_layer) + { + output_shape = compute_transposed_shape(input->clone()->set_tensor_shape(output_shape)); + output_shape = compute_interleave_custom_shape(output_shape, interleave, interleave); + } + + return output_shape; +} } // namespace shape_calculator } // namespace misc } // namespace arm_compute diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h index 463a7d53e3..9bc8d21fc4 100644 --- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -57,6 +57,16 @@ public: * @param[in] is_batched_fc_layer True if it is a batched fully connected layer */ void configure(const ITensor *input, ITensor *output, bool transpose_weights, bool is_batched_fc_layer); + /** Static function to check if given info will lead to a valid configuration of @ref CLFullyConnectedLayerReshapeWeights + * + * @param[in] input Weights tensor info. The weights must be 2 dimensional. Data types supported: QS8/QS16/F32. + * @param[in] output Destination tensor info. Data type supported: Same as @p input. + * @param[in] transpose_weights True if the weights must be transposed. Data types supported: Same as @p weights. + * @param[in] is_batched_fc_layer True if it is a batched fully connected layer + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, bool transpose_weights, bool is_batched_fc_layer); // Inherited methods overridden: void run() override; @@ -94,6 +104,18 @@ public: * @param[in] are_weights_reshaped (Optional) Reshape the weights tensor if false. Defaults to false. */ void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose_weights = true, bool are_weights_reshaped = false); + /** Static function to check if given info will lead to a valid configuration of @ref CLFullyConnectedLayer + * + * @param[in] input Source tensor info. Data type supported: QS8/QS16/F16/F32. + * @param[in] weights Weights tensor info. The weights must be 2 dimensional. Data type supported: Same as @p input + * @param[in] biases Bias tensor info. It can be nullptr. Data type supported:Same as @p input. + * @param[in] output Destination tensor info. Data type supported: Same as @p input. + * @param[in] transpose_weights (Optional) Transpose weights if true. Defaults to true. + * @param[in] are_weights_reshaped (Optional) Reshape the weights tensor if false. Defaults to false. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, bool transpose_weights = true, bool are_weights_reshaped = false); //Inherited methods override void run() override; diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h index c3c7f825a9..ac5f4caa78 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h @@ -141,12 +141,14 @@ public: private: /** Configures the appropriate matrix multiply routine * - * @param[in] input Input tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32. - * @param[in] weights Weights tensor. Data type supported: Same as @p input. - * @param[out] output Output tensor. Data types supported: Same as @p input, - * except for input of QASYMM8 type where output should be of S32 type. + * @param[in] input Input tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32. + * @param[in] weights Weights tensor. Data type supported: Same as @p input. + * @param[out] output Output tensor. Data types supported: Same as @p input, + * except for input of QASYMM8 type where output should be of S32 type. + * @param[in] is_interleaved (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMInterleave4x4Kernel and @ref CLGEMMTranspose1xWKernel + * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped */ - void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output); + void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output, bool is_interleaved, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo()); /** Prepare the appropriate assembly optimized kernel * * @param[in] ci CPU information @@ -178,7 +180,7 @@ private: bool _is_fully_connected_convolution; bool _are_weights_reshaped; bool _is_quantized; - bool _is_interleaved_transposed; + bool _is_interleaved; }; } #endif /* __ARM_COMPUTE_NECONVOLUTIONGEMMLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h index 447b8c9c70..8b9ad136b4 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMTranspose1xW.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -42,6 +42,14 @@ public: * @param[out] output Output tensor. Data type supported: same as @p input */ void configure(const ITensor *input, ITensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMTranspose1xW + * + * @param[in] input First input tensor. Data type supported: U8/S8/QS8/U16/S16/F16/U32/S32/F32/ + * @param[in] output Output tensor. Data type supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); }; } #endif /*__ARM_COMPUTE_NEGEMMTRANSPOSE1XW_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEIm2Col.h b/arm_compute/runtime/NEON/functions/NEIm2Col.h index cb08f5cd09..cf4999b5af 100644 --- a/arm_compute/runtime/NEON/functions/NEIm2Col.h +++ b/arm_compute/runtime/NEON/functions/NEIm2Col.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -39,28 +39,30 @@ class NEIm2Col : public INESimpleFunction public: /** Configure the im2col NEON kernel * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/QS16/QASYMM8/F16/F32 - * Note: QASYMM8 works only for has_bias = false - * @param[out] output The output tensor. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/QS16/QASYMM8/F16/F32 + * Note: QASYMM8 works only for has_bias = false + * @param[out] output The output tensor. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + * @param[in] is_fully_connected Determines whether this kernel will be called by @ref NEFullyConnectedLayer in order to validate the arguments */ - void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias); + void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, bool is_fully_connected = false); /** Static function to check if given info will lead to a valid configuration of @ref NEIm2Col * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/QS16/QASYMM8/F16/F32 - * Note: QASYMM8 works only for has_bias = false - * @param[in] output The output tensor. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/QS16/QASYMM8/F16/F32 + * Note: QASYMM8 works only for has_bias = false + * @param[in] output The output tensor. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + * @param[in] is_fully_connected Determines whether this kernel will be called by @ref NEFullyConnectedLayer in order to validate the arguments * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, bool is_fully_connected); }; } #endif /* __ARM_COMPUTE_NEIM2COL_H__ */ diff --git a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp index 3dd59bddd6..cab3c7a58f 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -39,6 +39,42 @@ using namespace arm_compute; +namespace +{ +inline Status validate_arguments(const ITensorInfo *accum, const ITensorInfo *biases) +{ + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(biases, accum); + ARM_COMPUTE_RETURN_ERROR_ON(biases->num_dimensions() > 1); + ARM_COMPUTE_RETURN_ERROR_ON(biases->dimension(0) != accum->dimension(0)); + + return Status{}; +} + +inline std::pair validate_and_configure_window(ITensorInfo *accum, ITensorInfo *biases) +{ + constexpr unsigned int num_elems_processed_per_iteration = 16; + + // Configure kernel window + Window win = calculate_max_window(*accum, Steps(num_elems_processed_per_iteration)); + + bool window_changed = update_window_and_padding(win, + AccessWindowHorizontal(accum, 0, num_elems_processed_per_iteration), + AccessWindowStatic(biases, 0, 0, ceil_to_multiple(biases->dimension(0), num_elems_processed_per_iteration), biases->tensor_shape().y())); + + AccessWindowHorizontal output_access(accum, 0, num_elems_processed_per_iteration); + + // Set the valid region for the accum tensor + Coordinates coord; + coord.set_num_dimensions(accum->num_dimensions()); + output_access.set_valid_region(win, ValidRegion(coord, accum->tensor_shape())); + + Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + return std::make_pair(err, win); +} +} // namespace + NEGEMMMatrixAccumulateBiasesKernel::NEGEMMMatrixAccumulateBiasesKernel() : _accum(nullptr), _biases(nullptr) { @@ -46,31 +82,26 @@ NEGEMMMatrixAccumulateBiasesKernel::NEGEMMMatrixAccumulateBiasesKernel() void NEGEMMMatrixAccumulateBiasesKernel::configure(ITensor *accum, const ITensor *biases) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum); - ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(biases, accum); - ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1); + ARM_COMPUTE_ERROR_ON_NULLPTR(accum, biases); + + // Perform validate step + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(accum->info(), biases->info())); _biases = biases; _accum = accum; - constexpr unsigned int num_elems_processed_per_iteration = 16; - // Configure kernel window - Window win = calculate_max_window(*accum->info(), Steps(num_elems_processed_per_iteration)); - - update_window_and_padding(win, - AccessWindowHorizontal(accum->info(), 0, num_elems_processed_per_iteration), - AccessWindowStatic(biases->info(), 0, 0, ceil_to_multiple(biases->info()->dimension(0), num_elems_processed_per_iteration), biases->info()->tensor_shape().y())); - - AccessWindowHorizontal output_access(accum->info(), 0, num_elems_processed_per_iteration); + auto win_config = validate_and_configure_window(accum->info(), biases->info()); + ARM_COMPUTE_ERROR_THROW_ON(win_config.first); + INEKernel::configure(win_config.second); +} - // Set the valid region for the accum tensor - Coordinates coord; - coord.set_num_dimensions(accum->info()->num_dimensions()); - output_access.set_valid_region(win, ValidRegion(coord, accum->info()->tensor_shape())); +Status NEGEMMMatrixAccumulateBiasesKernel::validate(const ITensorInfo *accum, const ITensorInfo *biases) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(accum, biases)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(accum->clone().get(), biases->clone().get()).first); - INEKernel::configure(win); + return Status{}; } void NEGEMMMatrixAccumulateBiasesKernel::run(const Window &window, const ThreadInfo &info) diff --git a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp index aa5e2dd0dd..69b052a9bd 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -36,6 +36,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" + #include #include #include @@ -1409,27 +1411,73 @@ void matrix_matrix_multiply_qs16(const ITensor *input0, const ITensor *input1, I ina, inb, out); } -Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output) +inline Status validate_arguments(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info) { + ARM_COMPUTE_UNUSED(alpha); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input0, 1, DataType::F16, DataType::F32, DataType::QS8, DataType::QS16); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, input1, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input0, input1, output); - ARM_COMPUTE_UNUSED(input0); - ARM_COMPUTE_UNUSED(input1); - ARM_COMPUTE_UNUSED(output); - if(output->dimension(1) == 1) + if(!is_interleaved) { ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(0) != input1->dimension(1)); + + if(output->total_size() != 0) + { + ARM_COMPUTE_RETURN_ERROR_ON(input1->dimension(0) != output->dimension(0)); + ARM_COMPUTE_RETURN_ERROR_ON(input0->dimension(1) != output->dimension(1)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input0, output); + } + } + else + { + const int m = reshape_info.m(); + const int n = reshape_info.n(); + const int k = reshape_info.k(); + const int mult_transpose1xW_width = reshape_info.mult_transpose1xW_width(); + const int mult_interleave4x4_height = reshape_info.mult_interleave4x4_height(); + + /* Interleave */ + TensorShape tensor_shape0{ input0->tensor_shape() }; + tensor_shape0.set(0, k); + tensor_shape0.set(1, m); + + const TensorInfo tensor_info0 = input0->clone()->set_tensor_shape(tensor_shape0); + const TensorInfo tensor_info_reshaped0 = input0->clone()->set_tensor_shape(misc::shape_calculator::compute_interleaved_shape(tensor_info0, mult_interleave4x4_height)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input0, &tensor_info_reshaped0); + + if(n != 0) /* Transpose */ + { + TensorShape tensor_shape1{ input1->tensor_shape() }; + tensor_shape1.set(0, n); + tensor_shape1.set(1, k); + + const TensorInfo tensor_info1 = input1->clone()->set_tensor_shape(tensor_shape1); + const TensorInfo tensor_info_reshaped1 = input1->clone()->set_tensor_shape(misc::shape_calculator::compute_transpose1xW_with_element_size_shape(tensor_info1, mult_transpose1xW_width)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input1, &tensor_info_reshaped1); + } + + if(output->total_size() != 0) + { + if(n != 0) + { + ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != static_cast(n)); + } + ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != static_cast(m)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input0, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input0, output); + } } return Status{}; } -std::pair validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1, ITensorInfo *output) +inline std::pair validate_and_configure_window(ITensorInfo *input0, ITensorInfo *input1, ITensorInfo *output) { - Window win = Window(); - bool window_changed = false; + bool window_changed{}; + Window win{}; unsigned int num_elems_processed_per_iteration_x = 0; const unsigned int num_elems_processed_per_iteration_y = 4; @@ -1538,11 +1586,19 @@ NEGEMMMatrixMultiplyKernel::NEGEMMMatrixMultiplyKernel() { } -void NEGEMMMatrixMultiplyKernel::configure(const ITensor *input0, const ITensor *input1, ITensor *output, float alpha) +void NEGEMMMatrixMultiplyKernel::configure(const ITensor *input0, const ITensor *input1, ITensor *output, float alpha, bool is_interleaved, const GEMMReshapeInfo &reshape_info) { - // Perform validate step ARM_COMPUTE_ERROR_ON_NULLPTR(input0, input1, output); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input0->info(), input1->info(), output->info())); + + // Output tensor auto inizialitation if not yet initialized + TensorShape tensor_shape{ input0->info()->tensor_shape() }; + tensor_shape.set(0, is_interleaved ? reshape_info.n() : input1->info()->dimension(0)); + tensor_shape.set(1, is_interleaved ? reshape_info.m() : input0->info()->dimension(1)); + + auto_init_if_empty(*output->info(), input0->info()->clone()->set_tensor_shape(tensor_shape)); + + // Perform validate step + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input0->info(), input1->info(), output->info(), alpha, is_interleaved, reshape_info)); _input0 = input0; _input1 = input1; @@ -1555,9 +1611,10 @@ void NEGEMMMatrixMultiplyKernel::configure(const ITensor *input0, const ITensor INEKernel::configure(win_config.second); } -Status NEGEMMMatrixMultiplyKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output) +Status NEGEMMMatrixMultiplyKernel::validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved, + const GEMMReshapeInfo &reshape_info) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, output)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input0, input1, output, alpha, is_interleaved, reshape_info)); ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input0->clone().get(), input1->clone().get(), output->clone().get()).first); return Status{}; diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index 633f78de4b..4fa329bf44 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -32,6 +32,8 @@ #include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" + #include #include #include @@ -42,14 +44,34 @@ using namespace arm_compute; namespace { -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, + bool has_bias, bool is_fully_connected, bool is_flatten) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output); ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::QASYMM8 && has_bias); - ARM_COMPUTE_UNUSED(kernel_dims); - ARM_COMPUTE_UNUSED(conv_info); + + if(is_flatten) /* Called by FlattenLayer */ + { + size_t flatten_shape = input->tensor_shape().x() * input->tensor_shape().y() * input->tensor_shape().z(); + ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != flatten_shape); + } + else if(!is_fully_connected) /* Called by ConvolutionLayer */ + { + std::pair out_dims = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_dims.width, kernel_dims.height, conv_info); + ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != (input->dimension(2) * kernel_dims.area() + (has_bias ? 1 : 0))); + ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != (out_dims.first * out_dims.second)); + ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(2) != 1); + } + else /* Called by FullyConnectedLayer */ + { + const int num_batch_dimensions = std::max(0, static_cast(output->tensor_shape().num_dimensions()) - 1); + const int num_input_dimensions = input->tensor_shape().num_dimensions() - num_batch_dimensions; + + TensorInfo expected_output = output->clone()->set_tensor_shape(misc::shape_calculator::compute_im2col_shape(input, num_input_dimensions)); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&expected_output, output); + } return Status{}; } @@ -291,12 +313,15 @@ NEIm2ColKernel::NEIm2ColKernel() { } -void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) +void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, + bool has_bias, bool is_fully_connected, bool is_flatten) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), kernel_dims, conv_info, has_bias)); + ARM_COMPUTE_UNUSED(is_fully_connected); + ARM_COMPUTE_UNUSED(is_flatten); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten)); _input = input; _output = output; @@ -382,9 +407,10 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size IKernel::configure(window); } -Status NEIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) +Status NEIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, + bool has_bias, bool is_fully_connected, bool is_flatten) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, kernel_dims, conv_info, has_bias)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten)); return Status{}; } diff --git a/src/graph/nodes/FullyConnectedLayer.cpp b/src/graph/nodes/FullyConnectedLayer.cpp index 219e0f9a93..3742150d37 100644 --- a/src/graph/nodes/FullyConnectedLayer.cpp +++ b/src/graph/nodes/FullyConnectedLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp index e9d14db96e..2b4670b98c 100644 --- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -114,7 +114,7 @@ void CLFullyConnectedLayer::configure_conv_fc(const ICLTensor *input, const ICLT // If the fully connected layer is called after a convolution layer, the input tensor must be linearized // Initialize output tensor for im2col - TensorShape shape_im2col = compute_im2col_shape(*input->info()); + TensorShape shape_im2col = compute_im2col_shape(input->info()); _im2col_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col)); // Configure im2col kernel @@ -243,7 +243,7 @@ Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn bool is_quantized = is_data_type_quantized_asymmetric(input->data_type()); const GPUTarget gpu_target = CLScheduler::get().target(); - const ITensorInfo &im2col_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_im2col_shape(*input))); + const ITensorInfo &im2col_input = TensorInfo(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_im2col_shape(input))); const ITensorInfo &reshaped_weights = TensorInfo(weights->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*weights))); const ITensorInfo &gemmlowp_output = TensorInfo(output->clone()->set_is_resizable(true).reset_padding().set_data_type(DataType::S32)); diff --git a/src/runtime/NEON/functions/NEFlattenLayer.cpp b/src/runtime/NEON/functions/NEFlattenLayer.cpp index 408eff5746..32edf93b63 100644 --- a/src/runtime/NEON/functions/NEFlattenLayer.cpp +++ b/src/runtime/NEON/functions/NEFlattenLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -32,6 +32,6 @@ using namespace arm_compute; void NEFlattenLayer::configure(const ITensor *input, ITensor *output) { auto k = arm_compute::support::cpp14::make_unique(); - k->configure(input, output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false); + k->configure(input, output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false, false, true); _kernel = std::move(k); } \ No newline at end of file diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp index fc04e28972..26b7271710 100644 --- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -23,15 +23,18 @@ */ #include "arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h" +#include "arm_compute/core/Helpers.h" #include "arm_compute/core/Size2D.h" #include "arm_compute/core/Validate.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include #include -namespace arm_compute -{ +using namespace arm_compute; +using namespace arm_compute::misc::shape_calculator; + NEFullyConnectedLayerReshapeWeights::NEFullyConnectedLayerReshapeWeights(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _transpose_kernel(), _transpose1xW_kernel(), _transpose_output(), _transpose_weights(false), _is_batched_fc_layer(false) { @@ -39,13 +42,10 @@ NEFullyConnectedLayerReshapeWeights::NEFullyConnectedLayerReshapeWeights(std::sh void NEFullyConnectedLayerReshapeWeights::configure(const ITensor *input, ITensor *output, bool transpose_weights, bool is_batched_fc_layer) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON(input->info()->num_dimensions() > 2); - ARM_COMPUTE_ERROR_ON(output == nullptr); - ARM_COMPUTE_ERROR_ON(!transpose_weights && !is_batched_fc_layer); + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - const DataType data_type = input->info()->data_type(); - const int fixed_point_position = input->info()->fixed_point_position(); + // Perform validate step + ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedLayerReshapeWeights::validate(input->info(), output->info(), transpose_weights, is_batched_fc_layer)); _transpose_weights = transpose_weights; _is_batched_fc_layer = is_batched_fc_layer; @@ -56,8 +56,7 @@ void NEFullyConnectedLayerReshapeWeights::configure(const ITensor *input, ITenso if(_is_batched_fc_layer) { // Initialize the output tensor for transpose - TensorShape shape_transposed(input->info()->dimension(1), input->info()->dimension(0)); - _transpose_output.allocator()->init(TensorInfo(shape_transposed, 1, data_type, fixed_point_position)); + _transpose_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*input->info()))); _memory_group.manage(&_transpose_output); _transpose_kernel.configure(input, &_transpose_output); @@ -79,11 +78,39 @@ void NEFullyConnectedLayerReshapeWeights::configure(const ITensor *input, ITenso // Configure transpose 1xW kernel _transpose1xW_kernel.configure(input, output); } + } +} + +Status NEFullyConnectedLayerReshapeWeights::validate(const ITensorInfo *input, const ITensorInfo *output, bool transpose_weights, bool is_batched_fc_layer) +{ + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() > 2); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(!transpose_weights && !is_batched_fc_layer, "Configuration transpose_weights=false & is_batched_fc_layer=false not supported"); + + if(transpose_weights) + { + if(is_batched_fc_layer) + { + std::unique_ptr use_output = output->clone(); + use_output->set_is_resizable(true).reset_padding().set_tensor_shape(compute_transposed_shape(*input)); + + ARM_COMPUTE_RETURN_ON_ERROR(NETransposeKernel::validate(input, use_output.get())); + ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMTranspose1xWKernel::validate(use_output.get(), output)); + } else { - ARM_COMPUTE_ERROR("Configuration transpose_weights=false & is_batched_fc_layer=false not supported"); + ARM_COMPUTE_RETURN_ON_ERROR(NETransposeKernel::validate(input, output)); + } + } + else + { + if(is_batched_fc_layer) + { + ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMTranspose1xWKernel::validate(input, output)); } } + + return Status{}; } void NEFullyConnectedLayerReshapeWeights::run() @@ -122,26 +149,25 @@ void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weigh // Weights: flat(In) x Out // Biases: Out // Output: Out x B (B can be multi-dimensional) + ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, weights, output); + // Perform validate step + ARM_COMPUTE_ERROR_THROW_ON(NEFullyConnectedLayer::validate(input->info(), + weights->info(), + biases != nullptr ? biases->info() : nullptr, + output->info(), + transpose_weights, + are_weights_reshaped)); - const DataType data_type = input->info()->data_type(); - const int fixed_point_position = input->info()->fixed_point_position(); - const int num_batch_dimensions = std::max(0, static_cast(output->info()->tensor_shape().num_dimensions()) - 1); - const int num_input_dimensions = input->info()->tensor_shape().num_dimensions() - num_batch_dimensions; - const size_t linear_input_size = input->info()->tensor_shape().total_size_lower(num_input_dimensions); + const int num_batch_dimensions = std::max(0, static_cast(output->info()->tensor_shape().num_dimensions()) - 1); + const int num_input_dimensions = input->info()->tensor_shape().num_dimensions() - num_batch_dimensions; + const size_t linear_input_size = input->info()->tensor_shape().total_size_lower(num_input_dimensions); _linearize_input = (input->info()->tensor_shape().x() != linear_input_size) || (num_input_dimensions > 1 && linear_input_size == 1); _are_weights_reshaped = are_weights_reshaped; _accumulate_biases = biases != nullptr; _is_batched_fc_layer = num_batch_dimensions > 0; - // Check if number of batches match - ARM_COMPUTE_ERROR_ON(input->info()->tensor_shape().total_size_upper(num_input_dimensions) != output->info()->tensor_shape().total_size_upper(1)); - ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 2); - const size_t interleave_width = 16 / input->info()->element_size(); const ITensor *weights_to_use = weights; @@ -149,65 +175,33 @@ void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weigh { weights_to_use = &_reshape_weights_output; - TensorShape reshaped_weights_shape(weights->info()->tensor_shape()); - - // Transpose weights if the user hasn't done it - if(transpose_weights) - { - const size_t shape_x = reshaped_weights_shape.x(); - reshaped_weights_shape.set(0, reshaped_weights_shape.y()); - reshaped_weights_shape.set(1, shape_x); - } - - // If the we run multiple batches we need 1xW transpose, too. - if(_is_batched_fc_layer) - { - const float shape_x = reshaped_weights_shape.x(); - reshaped_weights_shape.set(0, reshaped_weights_shape.y() * interleave_width); - reshaped_weights_shape.set(1, static_cast(std::ceil(shape_x / interleave_width))); - } - - _reshape_weights_output.allocator()->init(TensorInfo(reshaped_weights_shape, 1, data_type, fixed_point_position)); + _reshape_weights_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_fully_connected_reshaped_weights_shape(weights->info(), + transpose_weights, + _is_batched_fc_layer, interleave_width))); // Reshape the weights _reshape_weights_kernel.configure(weights, &_reshape_weights_output, transpose_weights, _is_batched_fc_layer); } - // Check correct shape of weights - if(_is_batched_fc_layer) - { - // Transpose + Transpose1xW - ARM_COMPUTE_ERROR_ON(weights_to_use->info()->tensor_shape().x() != linear_input_size * interleave_width); - ARM_COMPUTE_ERROR_ON(weights_to_use->info()->tensor_shape().y() != static_cast(std::ceil(static_cast(output->info()->tensor_shape().x()) / interleave_width))); - } - else - { - // Transpose - ARM_COMPUTE_ERROR_ON(weights_to_use->info()->tensor_shape().x() != output->info()->tensor_shape().x()); - ARM_COMPUTE_ERROR_ON(weights_to_use->info()->tensor_shape().y() != linear_input_size); - } - const ITensor *multiply_input = input; if(_linearize_input) { - TensorShape shape_im2col(input->info()->tensor_shape()); - shape_im2col.collapse(num_input_dimensions); - _im2col_output.allocator()->init(TensorInfo(shape_im2col, 1, data_type, fixed_point_position)); + _im2col_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_im2col_shape(input->info(), num_input_dimensions))); // Configure im2col kernel _memory_group.manage(&_im2col_output); - _im2col_kernel.configure(input, &_im2col_output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false); + _im2col_kernel.configure(input, &_im2col_output, Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false, true); multiply_input = &_im2col_output; } + int m = multiply_input->info()->dimension(1); + int k = multiply_input->info()->dimension(0); + if(_is_batched_fc_layer) { - TensorShape shape_interleaved(multiply_input->info()->tensor_shape()); - shape_interleaved.set(0, shape_interleaved.x() * 4); - shape_interleaved.set(1, std::ceil(shape_interleaved.y() / 4.f)); - _interleave4x4_output.allocator()->init(TensorInfo(shape_interleaved, 1, data_type, fixed_point_position)); + _interleave4x4_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_interleaved_shape(*multiply_input->info()))); // Configure interleave4x4 kernel _memory_group.manage(&_interleave4x4_output); @@ -217,13 +211,10 @@ void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weigh } // Configure matrix multiply kernel - _mm_kernel.configure(multiply_input, weights_to_use, output, 1.0f); + _mm_kernel.configure(multiply_input, weights_to_use, output, 1.0f, _is_batched_fc_layer, GEMMReshapeInfo(m, 0 /* no transpose */, k)); if(_accumulate_biases) { - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); - ARM_COMPUTE_ERROR_ON(biases->info()->tensor_shape().x() != output->info()->tensor_shape().x()); - // Configure accumulate biases kernel _accumulate_biases_kernel.configure(output, biases); } @@ -246,6 +237,88 @@ void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weigh } } +Status NEFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, bool transpose_weights, bool are_weights_reshaped) +{ + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, weights, output); + + const int num_batch_dimensions = std::max(0, static_cast(output->tensor_shape().num_dimensions()) - 1); + const int num_input_dimensions = input->tensor_shape().num_dimensions() - num_batch_dimensions; + const size_t linear_input_size = input->tensor_shape().total_size_lower(num_input_dimensions); + + const bool linearize_input = (input->tensor_shape().x() != linear_input_size) || (num_input_dimensions > 1 && linear_input_size == 1); + const bool accumulate_biases = biases != nullptr; + const bool is_batched_fc_layer = num_batch_dimensions > 0; + + ARM_COMPUTE_RETURN_ERROR_ON(input->tensor_shape().total_size_upper(num_input_dimensions) != output->tensor_shape().total_size_upper(1)); + ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 2); + + const size_t interleave_width = 16 / input->element_size(); + const ITensorInfo *weights_to_use = weights; + std::unique_ptr reshape_weights_output = input->clone(); + + if(!are_weights_reshaped && (transpose_weights || is_batched_fc_layer)) + { + reshape_weights_output->set_tensor_shape(compute_fully_connected_reshaped_weights_shape(weights, transpose_weights, is_batched_fc_layer, interleave_width)); + + ARM_COMPUTE_RETURN_ON_ERROR(NEFullyConnectedLayerReshapeWeights::validate(weights, reshape_weights_output.get(), transpose_weights, is_batched_fc_layer)); + + weights_to_use = reshape_weights_output.get(); + } + + // Check correct shape of weights + if(is_batched_fc_layer) + { + // Transpose + Transpose1xW + ARM_COMPUTE_RETURN_ERROR_ON(weights_to_use->tensor_shape().x() != linear_input_size * interleave_width); + ARM_COMPUTE_RETURN_ERROR_ON(weights_to_use->tensor_shape().y() != static_cast(std::ceil(static_cast(output->tensor_shape().x()) / interleave_width))); + } + else + { + // Transpose + ARM_COMPUTE_RETURN_ERROR_ON(weights_to_use->tensor_shape().x() != output->tensor_shape().x()); + ARM_COMPUTE_RETURN_ERROR_ON(weights_to_use->tensor_shape().y() != linear_input_size); + } + + const ITensorInfo *multiply_input = input; + std::unique_ptr im2col_output = input->clone(); + std::unique_ptr interleave4x4_output = input->clone(); + + if(linearize_input) + { + im2col_output->set_tensor_shape(compute_im2col_shape(input, num_input_dimensions)); + + ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, im2col_output.get(), Size2D(1, 1), PadStrideInfo(1, 1, 0, 0), false, true)); + + multiply_input = im2col_output.get(); + } + + int m = multiply_input->dimension(1); + int k = multiply_input->dimension(0); + + if(is_batched_fc_layer) + { + interleave4x4_output->set_tensor_shape(compute_interleaved_shape(*multiply_input)); + + ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMInterleave4x4Kernel::validate(multiply_input, interleave4x4_output.get())); + + multiply_input = interleave4x4_output.get(); + } + + ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixMultiplyKernel::validate(multiply_input, weights_to_use, output, 1.0f, is_batched_fc_layer, GEMMReshapeInfo(m, 0 /* no transpose */, k))); + + if(accumulate_biases) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); + ARM_COMPUTE_RETURN_ERROR_ON(biases->tensor_shape().x() != output->tensor_shape().x()); + + ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixAccumulateBiasesKernel::validate(output, biases)); + } + + return Status{}; +} + void NEFullyConnectedLayer::run() { // Reshape of the weights (happens only once) @@ -280,4 +353,3 @@ void NEFullyConnectedLayer::run() _memory_group.release(); } -} // namespace arm_compute diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp index 48a0d2af1c..05907bab07 100644 --- a/src/runtime/NEON/functions/NEGEMM.cpp +++ b/src/runtime/NEON/functions/NEGEMM.cpp @@ -120,7 +120,7 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe #endif /* defined(__aarch64__) */ { // Configure the matrix multiply kernel - _mm_kernel.configure(a, b, d, alpha); + _mm_kernel.configure(a, b, d, alpha, false); } // Configure matrix addition kernel @@ -212,6 +212,10 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe _memory_group.manage(&_tmp_a); _memory_group.manage(&_tmp_b); + int m = a->info()->dimension(1); + int n = b->info()->dimension(0); + int k = a->info()->dimension(0); + // Configure interleave kernel _interleave_kernel.configure(a, &_tmp_a); @@ -219,7 +223,7 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe _transpose_kernel.configure(b, &_tmp_b); // Configure matrix multiplication kernel - _mm_kernel.configure(&_tmp_a, &_tmp_b, d, alpha); + _mm_kernel.configure(&_tmp_a, &_tmp_b, d, alpha, true, GEMMReshapeInfo(m, n, k)); // Allocate once the all configure methods have been called _tmp_a.allocator()->allocate(); diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp index d0a16ef40d..a85078cf71 100644 --- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp @@ -178,7 +178,7 @@ TensorShape get_reshaped_weights_shape_conv(const ITensorInfo *weights, bool app Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, DataType &dt, bool &append_bias, bool &are_weights_reshaped, unsigned int &kernel_width, unsigned int &kernel_height, - bool &is_fully_connected_convolution, bool &is_interleaved_transposed, bool &is_quantized, + bool &is_fully_connected_convolution, bool &is_interleaved, bool &is_quantized, unsigned int &mat_weights_cols, unsigned int &mat_weights_rows, unsigned int &conv_w, unsigned int &conv_h) { @@ -219,7 +219,7 @@ Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInf // Check if its a "fully connected" convolution is_fully_connected_convolution = ((conv_w == 1) && (conv_h == 1)); - is_interleaved_transposed = (!is_fully_connected_convolution && !is_quantized); + is_interleaved = (!is_fully_connected_convolution && !is_quantized); return Status{}; } @@ -228,11 +228,11 @@ Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInf NEGEMMConvolutionLayer::NEGEMMConvolutionLayer(const std::shared_ptr &memory_manager) : _memory_group(memory_manager), _input_im2col_kernel(), _input_interleave_kernel(), _reshape_weights(), _mm_kernel(), _mm_optimised_kernel(nullptr), _mm_gemmlowp(memory_manager), _gemmlowp_output_stage(), _output_col2im_kernel(), _input_im2col_reshaped(), _input_interleaved_reshaped(), _weights_reshaped(), _gemm_output(), _tmp_output(), _workspace(), _append_bias(false), - _is_fully_connected_convolution(false), _are_weights_reshaped(false), _is_quantized(false), _is_interleaved_transposed(false) + _is_fully_connected_convolution(false), _are_weights_reshaped(false), _is_quantized(false), _is_interleaved(false) { } -void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *weights, ITensor *output) +void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *weights, ITensor *output, bool is_interleaved, const GEMMReshapeInfo &reshape_info) { if(_is_quantized) { @@ -252,7 +252,7 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w } else { - _mm_kernel.configure(input, weights, output, 1.f); + _mm_kernel.configure(input, weights, output, 1.f, is_interleaved, reshape_info); } } @@ -290,7 +290,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig Status status = validate_and_initialize_values(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(), conv_info, weights_info, dt, _append_bias, _are_weights_reshaped, kernel_width, kernel_height, - _is_fully_connected_convolution, _is_interleaved_transposed, _is_quantized, + _is_fully_connected_convolution, _is_interleaved, _is_quantized, mat_weights_cols, mat_weights_rows, conv_w, conv_h); ARM_COMPUTE_ERROR_THROW_ON(status); @@ -339,9 +339,8 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig } else { - const unsigned int transpose_width = 16 / input->info()->element_size(); - mat_weights_cols = weights_info.num_kernels(); - mat_weights_rows = weights->info()->dimension(0) / transpose_width + (_append_bias ? 1 : 0); + mat_weights_cols = weights_info.num_kernels(); + mat_weights_rows = weights_info.kernel_size().first * weights_info.kernel_size().second * input->info()->dimension(2) + (_append_bias ? 1 : 0); } } else @@ -362,7 +361,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig // Create tensor to store the reshaped weights _weights_reshaped.allocator()->init(TensorInfo(reshaped_weights_shape, 1, dt, fixed_point_position)); - _reshape_weights.configure(weights, biases_to_use, &_weights_reshaped, _is_interleaved_transposed /* 1xW transpose */); + _reshape_weights.configure(weights, biases_to_use, &_weights_reshaped, _is_interleaved /* 1xW transpose */); weights = &_weights_reshaped; } } @@ -430,18 +429,19 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig } else { - if(_is_interleaved_transposed) + if(_is_interleaved) { // Configure GEMMInterleave4x4. _input_interleaved_reshaped will be auto configured in the kernel _input_interleave_kernel.configure(&_input_im2col_reshaped, &_input_interleaved_reshaped); // Configure GEMM - configure_mm(&_input_interleaved_reshaped, weights, &_gemm_output); + configure_mm(&_input_interleaved_reshaped, weights, &_gemm_output, _is_interleaved, GEMMReshapeInfo(_input_im2col_reshaped.info()->dimension(1), 0 /* no transpose */, + _input_im2col_reshaped.info()->dimension(0))); _input_interleaved_reshaped.allocator()->allocate(); } else { - configure_mm(&_input_im2col_reshaped, weights, &_gemm_output); + configure_mm(&_input_im2col_reshaped, weights, &_gemm_output, _is_interleaved); } } @@ -479,11 +479,13 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) { + ARM_COMPUTE_UNUSED(output); + DataType dt{}; bool append_bias{}; bool are_weights_reshaped{}; bool is_fully_connected_convolution{}; - bool is_interleaved_transposed{}; + bool is_interleaved{}; bool is_quantized{}; unsigned int kernel_width = 0; unsigned int kernel_height = 0; @@ -493,9 +495,11 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI unsigned int conv_h = 0; Status status = validate_and_initialize_values(input, weights, biases, conv_info, weights_info, dt, append_bias, are_weights_reshaped, kernel_width, kernel_height, - is_fully_connected_convolution, is_interleaved_transposed, is_quantized, mat_weights_cols, mat_weights_rows, + is_fully_connected_convolution, is_interleaved, is_quantized, mat_weights_cols, mat_weights_rows, conv_w, conv_h); + const Size2D kernel_weights = Size2D(kernel_width, kernel_height); + ARM_COMPUTE_RETURN_ON_ERROR(status); std::unique_ptr reshaped_weights = weights->clone(); @@ -570,7 +574,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI shape_im2col.set(1, mat_input_rows); shape_im2col.set(2, 1); TensorInfo im2_col_info = input->clone()->set_tensor_shape(shape_im2col); - ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &im2_col_info, Size2D(weights->dimension(0), weights->dimension(1)), conv_info, append_bias)); + ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &im2_col_info, kernel_weights, conv_info, append_bias, false)); // Create GEMM output tensor TensorShape shape_gemm(im2_col_info.tensor_shape()); @@ -579,24 +583,20 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI TensorInfo gemm_output_info = input->clone()->set_tensor_shape(shape_gemm); // Validate GEMM interleave and multiply - if(is_interleaved_transposed) + if(is_interleaved) { TensorShape shape_interleaved = shape_im2col; shape_interleaved.set(0, shape_interleaved.x() * 4); shape_interleaved.set(1, std::ceil(shape_interleaved.y() / 4.f)); TensorInfo input_interleaved_info = input->clone()->set_tensor_shape(shape_interleaved); ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMInterleave4x4Kernel::validate(&im2_col_info, &input_interleaved_info)); - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixMultiplyKernel::validate(&input_interleaved_info, weights, &gemm_output_info)); + ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixMultiplyKernel::validate(&input_interleaved_info, weights, &gemm_output_info, 1.f, is_interleaved, GEMMReshapeInfo())); } else { - ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixMultiplyKernel::validate(&im2_col_info, weights, &gemm_output_info)); + ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMMatrixMultiplyKernel::validate(&im2_col_info, weights, &gemm_output_info, 1.f, is_interleaved, GEMMReshapeInfo())); } - ARM_COMPUTE_RETURN_ON_ERROR(NECol2ImKernel::validate(&gemm_output_info, output, Size2D(conv_w, conv_h))); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG((output->dimension(0) != conv_w) || (output->dimension(1) != conv_h), "Output shape does not match the expected one"); - return Status{}; } @@ -621,7 +621,7 @@ void NEGEMMConvolutionLayer::run() } else { - if(_is_interleaved_transposed) + if(_is_interleaved) { // Run interleave NEScheduler::get().schedule(&_input_interleave_kernel, Window::DimY); diff --git a/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp b/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp index 571bf2bc74..802b94650e 100644 --- a/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp +++ b/src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -38,3 +38,7 @@ void NEGEMMTranspose1xW::configure(const ITensor *input, ITensor *output) k->configure(input, output); _kernel = std::move(k); } +Status NEGEMMTranspose1xW::validate(const ITensorInfo *input, const ITensorInfo *output) +{ + return NEGEMMTranspose1xWKernel::validate(input, output); +} diff --git a/src/runtime/NEON/functions/NEIm2Col.cpp b/src/runtime/NEON/functions/NEIm2Col.cpp index 8e90e66dcc..b962db9144 100644 --- a/src/runtime/NEON/functions/NEIm2Col.cpp +++ b/src/runtime/NEON/functions/NEIm2Col.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -28,14 +28,14 @@ using namespace arm_compute; -void NEIm2Col::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) +void NEIm2Col::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, bool is_fully_connected) { auto k = arm_compute::support::cpp14::make_unique(); - k->configure(input, output, kernel_dims, conv_info, has_bias); + k->configure(input, output, kernel_dims, conv_info, has_bias, is_fully_connected); _kernel = std::move(k); } -Status NEIm2Col::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) +Status NEIm2Col::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, bool is_fully_connected) { - return NEIm2ColKernel::validate(input, output, kernel_dims, conv_info, has_bias); + return NEIm2ColKernel::validate(input, output, kernel_dims, conv_info, has_bias, is_fully_connected); } diff --git a/tests/validation/NEON/FullyConnectedLayer.cpp b/tests/validation/NEON/FullyConnectedLayer.cpp index fc5342d304..ed0edcd3be 100644 --- a/tests/validation/NEON/FullyConnectedLayer.cpp +++ b/tests/validation/NEON/FullyConnectedLayer.cpp @@ -115,6 +115,52 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(frame validate(dst.info()->valid_region(), dst_valid_region); } +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32), // Mismatching data types + TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::QS8, 2), // Mismatching fixed point position + TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32), // Invalid weights dimensions + TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32), // Wrongly reshaped weights + TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32), + }), + framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(315U, 271U), 1, DataType::F16), + TensorInfo(TensorShape(315U, 271U), 1, DataType::QS8, 3), + TensorInfo(TensorShape(192U, 192U), 1, DataType::F32), + TensorInfo(TensorShape(192U, 192U), 1, DataType::F32), + TensorInfo(TensorShape(217U, 315U), 1, DataType::F32), + TensorInfo(TensorShape(217U, 315U), 1, DataType::F32), + TensorInfo(TensorShape(192U, 192U), 1, DataType::F32), + })), + framework::dataset::make("BiasInfo",{ TensorInfo(TensorShape(271U), 1, DataType::F32), + TensorInfo(TensorShape(271U), 1, DataType::QS8, 2), + TensorInfo(TensorShape(192U), 1, DataType::F32), + TensorInfo(TensorShape(192U), 1, DataType::F32), + TensorInfo(TensorShape(271U), 1, DataType::F32), + TensorInfo(TensorShape(271U), 1, DataType::F32), + TensorInfo(TensorShape(192U), 1, DataType::F32), + })), + framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(271U, 3U), 1, DataType::F32), + TensorInfo(TensorShape(271U, 3U), 1, DataType::QS8, 3), + TensorInfo(TensorShape(192U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(192U, 4U), 1, DataType::F32), + TensorInfo(TensorShape(271U, 3U), 1, DataType::F32), + TensorInfo(TensorShape(271U, 3U), 1, DataType::F32), + TensorInfo(TensorShape(192U, 4U), 1, DataType::F32), + })), + framework::dataset::make("TransposeWeights",{ true, true, true, false, true, true, true })), + framework::dataset::make("ReshapedWeights",{ false, false, false, false, false, false , false})), + framework::dataset::make("Expected", { false, false, true, true, false, false, true })), + input_info, weights_info, bias_info, output_info, transpose_weights, reshaped_weights, expected) +{ + Status status = NEFullyConnectedLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), transpose_weights, reshaped_weights); + ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + template using NEFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture; diff --git a/tests/validation/NEON/Im2Col.cpp b/tests/validation/NEON/Im2Col.cpp index f8e474b6c3..96dd6f86ab 100644 --- a/tests/validation/NEON/Im2Col.cpp +++ b/tests/validation/NEON/Im2Col.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -44,6 +44,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( TensorInfo(TensorShape(10U, 12U, 2U), 1, DataType::F32), // Mismatching data type TensorInfo(TensorShape(10U, 12U, 2U), 1, DataType::QS8, 2), // Mismatching fixed point TensorInfo(TensorShape(10U, 12U, 2U), 1, DataType::QASYMM8), // Bias not supported with QASYMM8 + TensorInfo(TensorShape(10U, 12U, 2U), 1, DataType::QASYMM8), // Mismatching shapes TensorInfo(TensorShape(10U, 12U, 2U), 1, DataType::QASYMM8), }), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(3U, 4U, 10U, 2U), 1, DataType::F16), @@ -51,12 +52,13 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( TensorInfo(TensorShape(3U, 4U, 10U, 2U), 1, DataType::QS8, 3), TensorInfo(TensorShape(3U, 3U, 10U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(3U, 4U, 10U, 2U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(18U, 80U, 1U, 2U), 1, DataType::QASYMM8), })), - framework::dataset::make("HasBias", { true, true, true, true, false })), - framework::dataset::make("Expected", { false, false, false, false, true })), + framework::dataset::make("HasBias", { true, true, true, true, false, false })), + framework::dataset::make("Expected", { false, false, false, false, false, true })), input_info, output_info, has_bias, expected) { - bool status = bool(NEIm2Col::validate(&input_info, &output_info, Size2D(3U, 3U), PadStrideInfo(), has_bias)); + bool status = bool(NEIm2Col::validate(&input_info, &output_info, Size2D(3U, 3U), PadStrideInfo(), has_bias, false)); ARM_COMPUTE_EXPECT(status == expected, framework::LogLevel::ERRORS); } // clang-format on -- cgit v1.2.1