From 368e63507ad62dc1607f752302d8db6b7d603f71 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Mon, 20 Aug 2018 15:06:07 +0100 Subject: COMPMID-1047 Extract Flatten function from Im2Col for NEON Change-Id: I80f3aaadc8cae8c9ca1a5a239e79bda302b89bd8 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/144813 Tested-by: Jenkins Reviewed-by: Gian Marco Iodice --- .../runtime/NEON/functions/NEFlattenLayer.h | 17 ++++++--- .../runtime/NEON/functions/NEFullyConnectedLayer.h | 6 +-- arm_compute/runtime/NEON/functions/NEIm2Col.h | 44 ++++++++++------------ 3 files changed, 35 insertions(+), 32 deletions(-) (limited to 'arm_compute/runtime/NEON/functions') diff --git a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h index 2c259fa178..26d7c7f636 100644 --- a/arm_compute/runtime/NEON/functions/NEFlattenLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFlattenLayer.h @@ -31,11 +31,7 @@ namespace arm_compute { class ITensor; -/** Basic function to execute flatten. This function calls the following NEON kernel: -* -* -# @ref NEIm2ColKernel -* -*/ +/** Basic function to execute flatten layer kernel. */ class NEFlattenLayer : public INESimpleFunction { public: @@ -46,6 +42,17 @@ public: * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input */ void configure(const ITensor *input, ITensor *output); + + /** Static function to check if given info will lead to a valid configuration of @ref NEFlattenLayer + * + * @param[in] input First input tensor to flatten with at least 3 dimensions. + * The dimensions above the third will be interpreted as batches. Data types supported: U8/S8/QASYMM8/U16/S16/F16/U32/S32/F32 + * @param[out] output Output tensor with shape [w*h*d, input_batches] where: + * w = width input tensor, h = height input tensor and d = depth input tensor. Data type supported: same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output); }; } // namespace arm_compute diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h index fe0f2f03f7..9c9074ceec 100644 --- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h @@ -26,8 +26,8 @@ #include "arm_compute/runtime/IFunction.h" +#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h" -#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" #include "arm_compute/core/NEON/kernels/NETransposeKernel.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h" @@ -129,14 +129,14 @@ private: void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output); MemoryGroup _memory_group; - NEIm2ColKernel _im2col_kernel; + NEFlattenLayerKernel _flatten_kernel; NEConvertFullyConnectedWeights _convert_weights; NEFullyConnectedLayerReshapeWeights _reshape_weights_function; NEGEMM _mm_gemm; NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp; NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage; NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; - Tensor _im2col_output; + Tensor _flatten_output; Tensor _gemmlowp_output; Tensor _converted_weights_output; Tensor _reshape_weights_output; diff --git a/arm_compute/runtime/NEON/functions/NEIm2Col.h b/arm_compute/runtime/NEON/functions/NEIm2Col.h index 9df4f070d8..de4780f8f0 100644 --- a/arm_compute/runtime/NEON/functions/NEIm2Col.h +++ b/arm_compute/runtime/NEON/functions/NEIm2Col.h @@ -42,38 +42,34 @@ public: NEIm2Col(); /** Configure the im2col NEON kernel * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32 - * Note: QASYMM8 works only for has_bias = false - * @param[out] output The output tensor. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution - * @param[in] is_fully_connected (Optional) Determines whether this function will be called by @ref NEFullyConnectedLayer in order to validate the arguments - * @param[in] is_flatten (Optional) Determines whether this function will be called by @ref NEFlattenLayer in order to validate the arguments + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32 + * Note: QASYMM8 works only for has_bias = false + * @param[out] output The output tensor. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution */ void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U), - unsigned int num_groups = 1, bool is_fully_connected = false, bool is_flatten = false); + unsigned int num_groups = 1); /** Static function to check if given info will lead to a valid configuration of @ref NEIm2Col * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32 - * Note: QASYMM8 works only for has_bias = false - * @param[in] output The output tensor. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias In case biases are provided expands the matrix with 1. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution - * @param[in] is_fully_connected (Optional) Determines whether this function will be called by @ref NEFullyConnectedLayer in order to validate the arguments - * @param[in] is_flatten (Optional) Determines whether this function will be called by @ref NEFlattenLayer in order to validate the arguments + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F16/F32 + * Note: QASYMM8 works only for has_bias = false + * @param[in] output The output tensor. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias In case biases are provided expands the matrix with 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] num_groups (Optional) Number of groups when performing a grouped convolution * * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U), - unsigned int num_groups = 1, bool is_fully_connected = false, bool is_flatten = false); + unsigned int num_groups = 1); // Inherited methods overridden: void run() override; -- cgit v1.2.1